**Connect to Elasticsearch**

In [2]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")

client_info = es.info()

print("Succesfully connected to Elasticsearch!")

pprint(client_info.body)

Succesfully connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'T1HeaWnRTOqX_BBgREVVbA',
 'name': '64c49e436740',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-10-21T10:06:21.288851013Z',
             'build_flavor': 'default',
             'build_hash': '25d88452371273dd27356c98598287b669a03eae',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '10.3.1',
             'minimum_index_compatibility_version': '8.0.0',
             'minimum_wire_compatibility_version': '8.19.0',
             'number': '9.2.0'}}


**Create Index with auto mapping**

In [3]:
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

**Inserting Documents**

In [4]:
import json 
from tqdm import tqdm 

documents = json.load(open("data/data_2.json"))

for _ in range(10):
    documents += documents

In [5]:
len(documents)

5120

In [6]:
operations = []

for document in tqdm(documents, total=len(documents)):
    operations.append({"index": {"_index": "my_index"}}) # action
    operations.append(document) # source 

es.bulk(operations=operations)


100%|██████████| 5120/5120 [00:00<00:00, 377201.51it/s]




ObjectApiResponse({'errors': False, 'took': 1203, 'items': [{'index': {'_index': 'my_index', '_id': 'KZIJQpoBsuuLZ2nETW5f', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'KpIJQpoBsuuLZ2nETW5f', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'K5IJQpoBsuuLZ2nETW5f', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'LJIJQpoBsuuLZ2nETW5f', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'LZIJQpoBsuuLZ2nETW5f', '_version': 1, 'result': 'created', '_shards': {'tot

### Searching

**1. Size + From**
#
In this example, we perform a search that retrieves 10 documents, starting from the 11th document (i.e., skipping the first 10 results). This demonstrates `pagination` using the size and from parameters.

In [7]:
response = es.search(
    index="my_index", 
    body={
        "query": {
            "match_all": {}
        },
        "size": 10,
        "from": 10
    }
)

In [8]:
for hit in response["hits"]["hits"]:
    print(hit["_source"])

{'message': 'This is an important keyword search result.', 'age': 25, 'price': 100.0}
{'message': 'Another search result with an important keyword.', 'age': 30, 'price': 150.0}
{'message': 'Keyword match in this result as well.', 'age': 40, 'price': 200.0}
{'message': 'Important keyword again in this document.', 'age': 35, 'price': 120.0}
{'message': 'Final document with the important keyword.', 'age': 28, 'price': 180.0}
{'message': 'This is an important keyword search result.', 'age': 25, 'price': 100.0}
{'message': 'Another search result with an important keyword.', 'age': 30, 'price': 150.0}
{'message': 'Keyword match in this result as well.', 'age': 40, 'price': 200.0}
{'message': 'Important keyword again in this document.', 'age': 35, 'price': 120.0}
{'message': 'Final document with the important keyword.', 'age': 28, 'price': 180.0}


**2. Timeout**
#
This example shows how to set a timeout for the search query. If the query takes longer than the specified 10s (10 seconds), it will be aborted.

In [9]:
response = es.search(
    index="my_index", 
    body={
        "query": {
            "match": {
                "message": "search keyword"
            }
        },
        "timeout": "10s"
    }
)

pprint(response["hits"]["hits"])

[{'_id': 'KZIJQpoBsuuLZ2nETW5f',
  '_index': 'my_index',
  '_score': 0.8939354,
  '_source': {'age': 25,
              'message': 'This is an important keyword search result.',
              'price': 100.0}},
 {'_id': 'KpIJQpoBsuuLZ2nETW5f',
  '_index': 'my_index',
  '_score': 0.8939354,
  '_source': {'age': 30,
              'message': 'Another search result with an important keyword.',
              'price': 150.0}},
 {'_id': 'LpIJQpoBsuuLZ2nETW5f',
  '_index': 'my_index',
  '_score': 0.8939354,
  '_source': {'age': 25,
              'message': 'This is an important keyword search result.',
              'price': 100.0}},
 {'_id': 'L5IJQpoBsuuLZ2nETW5f',
  '_index': 'my_index',
  '_score': 0.8939354,
  '_source': {'age': 30,
              'message': 'Another search result with an important keyword.',
              'price': 150.0}},
 {'_id': 'M5IJQpoBsuuLZ2nETW5f',
  '_index': 'my_index',
  '_score': 0.8939354,
  '_source': {'age': 25,
              'message': 'This is an important ke

### 3.Aggregation
In this example, we perform an aggregation to calculate the average value of the `age` field across all documents that match the query. The result of the aggregation is stored in the `avg_age` key.

In [12]:
response = es.search(
    index="my_index",
    body={
        "query": {"match_all": {}},
        "aggs": {
            "avg_age":{
                "avg": {
                    "field": "age"
                }
            }
        }
    }
)

average_age = response["aggregations"]["avg_age"]["value"]
print(f"Average Age: {average_age}")

Average Age: 31.6


### 4. Combining size, from, timeout, and aggs
Here we combine multiple parameters: we limit the results to 5 documents (size), skip the first 20 documents (from), set a timeout of 5 seconds (timeout), and perform a maximum aggregation (aggs) on the price field. This demonstrates how to use multiple search parameters together.

In [14]:
response = es.search(
    index="my_index",
    body={
        "query": {
            "match": {
                "message": "important keyword"
            }
        },
        "aggs": {
            "max_price": {
                "max": {
                    "field": "price"
                }
            }
        },
        "size": 5,
        "from": 10,
        "timeout": "10s"
    }
)

max_price = response["aggregations"]["max_price"]["value"]
print(f"Max value is {max_price} BDT.")

for hit in response["hits"]["hits"]:
    print(hit["_source"])

Max value is 200.0 BDT.
{'message': 'Important keyword again in this document.', 'age': 35, 'price': 120.0}
{'message': 'Final document with the important keyword.', 'age': 28, 'price': 180.0}
{'message': 'Important keyword again in this document.', 'age': 35, 'price': 120.0}
{'message': 'Final document with the important keyword.', 'age': 28, 'price': 180.0}
{'message': 'Important keyword again in this document.', 'age': 35, 'price': 120.0}
