**Connected to Elasticsearch**

In [2]:
from pprint import pprint 
from elasticsearch import Elasticsearch 

es = Elasticsearch("http://localhost:9200")

client_info = es.info()

print("Connected to Elasticsearch!")

pprint(client_info.body)

Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'T1HeaWnRTOqX_BBgREVVbA',
 'name': '64c49e436740',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-10-21T10:06:21.288851013Z',
             'build_flavor': 'default',
             'build_hash': '25d88452371273dd27356c98598287b669a03eae',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '10.3.1',
             'minimum_index_compatibility_version': '8.0.0',
             'minimum_wire_compatibility_version': '8.19.0',
             'number': '9.2.0'}}


**Creae Index**

In [4]:
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

**Index data**

In [5]:
import json 

documents = json.load(open("data/clothes.json"))

operations = []

for document in documents:
    operations.append({"index": {"_index": "my_index"}}) # action 
    operations.append(document)

response = es.bulk(operations=operations)

pprint(response.body)


{'errors': False,
 'items': [{'index': {'_id': 'GbJGTpoBhG6nlJzo4KAN',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 0,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': 'GrJGTpoBhG6nlJzo4KAO',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 1,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': 'G7JGTpoBhG6nlJzo4KAO',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 2,
                      '_shards': {'failed': 0, 'successful': 1, '

In [8]:
count = es.count(index="my_index")
print(f"Number of documents in index: {count.body["count"]}")

Number of documents in index: 100


In [13]:
response = es.search(
    index="my_index", 
    body={
        "query": {
            "bool": {
                "filter": [
                    {
                        "term": {
                            "brand": "adidas"
                        }
                    }
                ]
            }
        },
        "size": 100,
    }
)

hits = response["hits"]["hits"]
print(f"Total {len(hits)} documents found.")

Total 23 documents found.


Here, we apply multiple filters using an AND operation to retain documents where the brand is Adidas and the color is yellow.

In [24]:
response = es.search(
    index="my_index", 
    body={
        "query": {
            "bool": {
                "filter": [
                    {
                        "term": {
                            "brand":  "adidas"
                        }
                    },
                    {
                        "term": {
                            "color": "yellow"
                        }
                    }
                ]
            }
        }
    }
)

hits = response["hits"]["hits"]

print(hits[0]["_source"])

print(f"Total {len(hits)} documents found.")

{'brand': 'adidas', 'color': 'yellow', 'model': 'model_15'}
Total 6 documents found.


**Post filters**
#
In this example, we'll explore the use of filters, aggregations, filtered aggregations, and post-filters.

We start by narrowing our search to documents where the brand is gucci. Next, we apply aggregations to determine the document count for each color. We then define a filtered aggregation, color_red, which counts the models in documents where the color is red.

Finally, a post_filter is used after performing the aggregations, refining the search results to include only documents with the color red.

In [29]:
response = es.search(
    index="my_index", 
    body={
        "query": {
            "bool": {
                "filter": {
                    "term": {
                        "brand": "gucci"
                    }
                }
            }
        },
        "aggs": {
            "colors": {
                "terms": {
                    "field": "color.keyword"
                }
            },
            "color_red": {
                "filter": {
                    "term": {
                        "color.keyword": "red"
                    }
                },
                "aggs": {
                    "models": {
                        "terms": {
                            "field": "model.keyword"
                        }
                    }
                }
            }
        }
    }
)

hits = response["hits"]["hits"]

pprint(response.body)

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'aggregations': {'color_red': {'doc_count': 12,
                                'models': {'buckets': [{'doc_count': 3,
                                                        'key': 'model_1'},
                                                       {'doc_count': 1,
                                                        'key': 'model_14'},
                                                       {'doc_count': 1,
                                                        'key': 'model_16'},
                                                       {'doc_count': 1,
                                                        'key': 'model_2'},
                                                       {'doc_count': 1,
                                                        'key': 'model_26'},
                                                       {'doc_count': 1,
                                                        'key': 'model_2

In [30]:
colors_aggregation = response.body['aggregations']['colors']['buckets']
pprint(colors_aggregation)

[{'doc_count': 12, 'key': 'red'},
 {'doc_count': 8, 'key': 'blue'},
 {'doc_count': 6, 'key': 'green'},
 {'doc_count': 4, 'key': 'yellow'}]


In [31]:
color_red_aggregation = response.body['aggregations']['color_red']['models']['buckets']
pprint(color_red_aggregation)

[{'doc_count': 3, 'key': 'model_1'},
 {'doc_count': 1, 'key': 'model_14'},
 {'doc_count': 1, 'key': 'model_16'},
 {'doc_count': 1, 'key': 'model_2'},
 {'doc_count': 1, 'key': 'model_26'},
 {'doc_count': 1, 'key': 'model_28'},
 {'doc_count': 1, 'key': 'model_3'},
 {'doc_count': 1, 'key': 'model_4'},
 {'doc_count': 1, 'key': 'model_6'},
 {'doc_count': 1, 'key': 'model_8'}]


In [33]:
hits = response.body['hits']['hits']
for hit in hits:
    print(f"""Shirt brand: {hit['_source']['brand']}, color: {
          hit['_source']['color']}, and model: {hit['_source']['model']}""")

Shirt brand: gucci, color: red, and model: model_1
Shirt brand: gucci, color: blue, and model: model_2
Shirt brand: gucci, color: red, and model: model_3
Shirt brand: gucci, color: green, and model: model_4
Shirt brand: gucci, color: red, and model: model_1
Shirt brand: gucci, color: yellow, and model: model_2
Shirt brand: gucci, color: blue, and model: model_3
Shirt brand: gucci, color: red, and model: model_4
Shirt brand: gucci, color: red, and model: model_1
Shirt brand: gucci, color: blue, and model: model_16
