# Elasticsearch DSL

In [5]:
from pprint import pprint
# ignore warning
import warnings
warnings.filterwarnings("ignore")

## Creating index and saving documents 

In [6]:
from elasticsearch_dsl.connections import connections
connections.create_connection(hosts='http://localhost:9200')

<Elasticsearch(['http://localhost:9200'])>

In [8]:
from elasticsearch_dsl import Document, Text, Keyword, Float, Integer, Nested, InnerDoc


class Filter(InnerDoc):
    name = Keyword(copy_to='all_filters')
    value = Keyword(copy_to='all_filters')


class Product(Document):
    title = Text(copy_to='all_filters')
    description = Text(copy_to='all_filters')
    price = Float()
    category = Keyword(copy_to='all_filters')
    brand = Keyword(copy_to='all_filters')
    quantity = Integer()
    filters = Nested(Filter)

    class Index:
        name = 'product'
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 0
        }


# Create the index in Elasticsearch
Product.init()

In [None]:
# save doc to index
product = Product(
    title="Apple iPhone 12",
    description="Previous generation Apple smartphone",
    price=799.99,
    category="electronics",
    brand="Apple",
    quantity=80,
    filters=[
        {'name': 'color', 'value': 'purple'},
        {'name': 'storage', 'value': '64GB'},
        {'name': 'network', 'value': '5G'}
    ]
)
product.save()

# Add more products similarly

## Query

### Making query using elasticsearch-py and elasticsearch_dsl

In [3]:
from elasticsearch_dsl import Search
from elasticsearch import Elasticsearch
client = Elasticsearch(hosts='http://localhost:9200')

s = Search(
    using=client,
    index="product"
).query("match_all")
# print(s.execute())

for product in s:
    print(product.title)

Apple iPhone 15 pro
Apple iPhone 15
Apple iPhone 14 pro
Apple iPhone 14
Apple iPhone 13 pro
Apple iPhone 13
Samsung Galaxy S22 Ultra
Samsung Galaxy S22
Samsung Galaxy S21 Ultra
Samsung Galaxy S21


In [7]:
# Define the raw query
es = Elasticsearch(hosts='http://localhost:9200')

query = {
    "size": 0,
    "aggs": {
        "facets": {
            "nested": {
                "path": "filters"
            },
            "aggs": {
                "names": {
                    "terms": {
                        "field": "filters.name"
                    },
                    "aggs": {
                        "values": {
                            "terms": {
                                "field": "filters.value"
                            }
                        }
                    }
                }
            }
        }
    }
}

# Execute the query using the Elasticsearch client
response = es.search(index="product", body=query)
# pprint(response)
pprint(response['aggregations']['facets']['names']['buckets'])

[{'doc_count': 52,
  'key': 'Display Size',
  'values': {'buckets': [{'doc_count': 9, 'key': '14 inch'},
                         {'doc_count': 7, 'key': '15.6 inch'},
                         {'doc_count': 6, 'key': '6.7 inch'},
                         {'doc_count': 4, 'key': '6.1 inch'},
                         {'doc_count': 3, 'key': '13.3 inch'},
                         {'doc_count': 2, 'key': '16 inch'},
                         {'doc_count': 2, 'key': '6.4 inch'},
                         {'doc_count': 2, 'key': '6.5 inch'},
                         {'doc_count': 2, 'key': '6.55 inch'},
                         {'doc_count': 2, 'key': '6.67 inch'}],
             'doc_count_error_upper_bound': 0,
             'sum_other_doc_count': 13}},
 {'doc_count': 52,
  'key': 'RAM',
  'values': {'buckets': [{'doc_count': 17, 'key': '8GB'},
                         {'doc_count': 14, 'key': '16GB'},
                         {'doc_count': 10, 'key': '12GB'},
                         {'doc_co

In [8]:
s = Search(
    # using=client
).query("match", title="iphone")
# print(s.execute())
for product in s:
    print(product.title)

Apple iPhone 15
Apple iPhone 14
Apple iPhone 13
Apple iPhone 15 pro
Apple iPhone 14 pro
Apple iPhone 13 pro


### `Q` object

In [25]:
from elasticsearch_dsl import Q

q = Q('bool',
      must=[Q('match', title='iphone')],
      should=[Q('match', title='apple'), Q('match', title='pro')],
      minimum_should_match=1
      )
s = Search(index="product").query(q)
for product in s:
    print(product.title)

Apple iPhone 14 Pro
Apple iPhone 12
Apple iPhone 14


### Filter

In [31]:
s = Search(index="product")
s = s.query('bool', filter=[Q('match', title='apple'), Q('match', category='smartphones')])
for product in s:
    print(product.title)

Apple iPhone 12
Apple iPhone 14
Apple iPhone 14 Pro


## Aggregations

```sh
GET index/_search
{
  "aggs": {
    "agg_name": {
      "agg_type": {
        "field": "field_name"
      }
    }
  }
}
```

### Metrics aggregations

In [48]:
from elasticsearch_dsl import Search

# Create a Search object
s = Search(index="product")

# Add the aggregation
s.aggs.metric(name='stats_price', agg_type='stats', field='price')

# Set the size to 0
s = s.extra(size=0)
# Execute the search
response = s.execute()
pprint(s.to_dict())

# Access the stats aggregation results
stats_price = response.aggregations.stats_price

# Print the stats
print("Count:", stats_price.count)
print("Min:", stats_price.min)
print("Max:", stats_price.max)
print("Avg:", stats_price.avg)
print("Sum:", stats_price.sum)

{'aggs': {'stats_price': {'stats': {'field': 'price'}}}, 'size': 0}
Count: 66
Min: 29.989999771118164
Max: 3899.0
Avg: 774.867570067897
Sum: 51141.2596244812


### Bucket aggregation

In [25]:
from elasticsearch_dsl import Search, A

# Create a Search object
s = Search(index="product")

# Define the terms aggregation
# agg = A('terms', field='category', min_doc_count=1)
# Add the aggregation to the search object
# s.aggs.bucket('groups_by_category', agg)

s.aggs.bucket(name='groups_by_category', agg_type='terms', field='category', min_doc_count=1)

# Set the size to 0
# s = s.extra(size=0)

pprint(s.to_dict())

# Execute the search
response = s.execute()

# Access the aggregation results
groups_by_category = response.aggregations.groups_by_category

# Print the category buckets
for bucket in groups_by_category.buckets:
    print(f"Category: {bucket.key}, Document Count: {bucket.doc_count}")

{'aggs': {'groups_by_category': {'terms': {'field': 'category',
                                           'min_doc_count': 1}}}}
Category: Smartphones, Document Count: 27
Category: Laptops, Document Count: 25
Category: Clothing, Document Count: 10
Category: Televisions, Document Count: 10
Category: Shoes, Document Count: 5


## Pagination


To specify the from/size parameters, use the Python slicing API:


In [51]:
s = s[10:20]
# {"from": 10, "size": 10}

s = s[:20]
# {"size": 20}

s = s[10:]
# {"from": 10}

s = s[10:20][2:]
# {"from": 12, "size": 8}

In [24]:
# Define the raw query
es = Elasticsearch(hosts='http://localhost:9200')

search_queries = [
    {
        "multi_match": {
            "query": "iphone",
            "fields": [
                "title",
                "description",
                "all_filters"
            ]
        }
    }
]

filter1 = {
    "nested": {
        "path": "filters",
        "query": {
                "bool": {
                    "must": [
                        {
                            "term": {
                                "filters.name": "RAM"
                            }
                        }
                    ],
                    "should": [
                        {
                            "term": {
                                "filters.value": "4GB"
                            }
                        },
                        {
                            "term": {
                                "filters.value": "8GB"
                            }
                        }
                    ],
                    "minimum_should_match": 1
                }
        }
    }
}
filter_options = [
    filter1
]
facets_agg = {
    "facets": {
        "nested": {
            "path": "filters"
        },
        "aggs": {
            "names": {
                "terms": {
                    "field": "filters.name"
                },
                "aggs": {
                    "values": {
                        "terms": {
                            "field": "filters.value"
                        }
                    }
                }
            }
        }
    }
}
body = {
    "size": 10,
    "query": {
        "bool": {
            "must":  search_queries,
            "filter": filter_options
        }
    },
    "aggs": facets_agg
}


# Execute the query using the Elasticsearch client
response = es.search(index="product", body=query)
# pprint(response)
pprint(response['aggregations'])

{'facets': {'doc_count': 494,
            'names': {'buckets': [{'doc_count': 52,
                                   'key': 'Display Size',
                                   'values': {'buckets': [{'doc_count': 9,
                                                           'key': '14 inch'},
                                                          {'doc_count': 7,
                                                           'key': '15.6 inch'},
                                                          {'doc_count': 6,
                                                           'key': '6.7 inch'},
                                                          {'doc_count': 4,
                                                           'key': '6.1 inch'},
                                                          {'doc_count': 3,
                                                           'key': '13.3 inch'},
                                                          {'doc_count': 2,
              