# Elasticsearch DSL

In [35]:
from pprint import pprint

## Creating index and saving documents 

In [8]:
from elasticsearch_dsl import Document, Text, Keyword, Float, Integer, Nested, InnerDoc
from elasticsearch_dsl.connections import connections
connections.create_connection(hosts='http://localhost:9200')


class Filter(InnerDoc):
    name = Keyword(copy_to='all_filters')
    value = Keyword(copy_to='all_filters')


class Product(Document):
    title = Text(copy_to='all_filters')
    description = Text(copy_to='all_filters')
    price = Float()
    category = Keyword(copy_to='all_filters')
    brand = Keyword(copy_to='all_filters')
    quantity = Integer()
    filters = Nested(Filter)

    class Index:
        name = 'product'
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 0
        }


# Create the index in Elasticsearch
Product.init()

In [9]:
# save doc to index
product = Product(
    title="Apple iPhone 12",
    description="Previous generation Apple smartphone",
    price=799.99,
    category="electronics",
    brand="Apple",
    quantity=80,
    filters=[
        {'name': 'color', 'value': 'purple'},
        {'name': 'storage', 'value': '64GB'},
        {'name': 'network', 'value': '5G'}
    ]
)
product.save()

# Add more products similarly

'created'

## Search in elasticsearch-dsl

### searching

In [21]:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search

# client = Elasticsearch(hosts='http://localhost:9200')

s = Search(
    # using=client
).query("match", title="iphone")
# print(s.execute())
for product in s:
    print(product.title)

iPhone 13
Apple iPhone 12
Apple iPhone 14
Apple iPhone 14 Pro


### `Q` object

In [25]:
from elasticsearch_dsl import Q

q = Q('bool',
      must=[Q('match', title='iphone')],
      should=[Q('match', title='apple'), Q('match', title='pro')],
      minimum_should_match=1
      )
s = Search(index="product").query(q)
for product in s:
    print(product.title)

Apple iPhone 14 Pro
Apple iPhone 12
Apple iPhone 14


### Filter

In [31]:
s = Search(index="product")
s = s.query('bool', filter=[Q('match', title='apple'), Q('match', category='smartphones')])
for product in s:
    print(product.title)

Apple iPhone 12
Apple iPhone 14
Apple iPhone 14 Pro


## Aggregations

```sh
GET index/_search
{
  "aggs": {
    "agg_name": {
      "agg_type": {
        "field": "field_name"
      }
    }
  }
}
```

### Metrics aggregations

In [48]:
from elasticsearch_dsl import Search

# Create a Search object
s = Search(index="product")

# Add the aggregation
s.aggs.metric(name='stats_price', agg_type='stats', field='price')

# Set the size to 0
s = s.extra(size=0)
# Execute the search
response = s.execute()
pprint(s.to_dict())

# Access the stats aggregation results
stats_price = response.aggregations.stats_price

# Print the stats
print("Count:", stats_price.count)
print("Min:", stats_price.min)
print("Max:", stats_price.max)
print("Avg:", stats_price.avg)
print("Sum:", stats_price.sum)

{'aggs': {'stats_price': {'stats': {'field': 'price'}}}, 'size': 0}
Count: 66
Min: 29.989999771118164
Max: 3899.0
Avg: 774.867570067897
Sum: 51141.2596244812


### Bucket aggregation

In [49]:
from elasticsearch_dsl import Search, A

# Create a Search object
s = Search(index="product")

# Define the terms aggregation
# agg = A('terms', field='category', min_doc_count=1)
# Add the aggregation to the search object
# s.aggs.bucket('groups_by_category', agg)

s.aggs.bucket(name='groups_by_category', agg_type='terms', field='category', min_doc_count=1)

# Set the size to 0
# s = s.extra(size=0)

pprint(s.to_dict())

# Execute the search
response = s.execute()

# Access the aggregation results
groups_by_category = response.aggregations.groups_by_category

# Print the category buckets
for bucket in groups_by_category.buckets:
    print(f"Category: {bucket.key}, Document Count: {bucket.doc_count}")

{'aggs': {'groups_by_category': {'terms': {'field': 'category',
                                           'min_doc_count': 1}}}}
Category: laptops, Document Count: 14
Category: clothing, Document Count: 10
Category: televisions, Document Count: 7
Category: fashion, Document Count: 6
Category: smartphones, Document Count: 6
Category: refrigerators, Document Count: 5
Category: shoes, Document Count: 5
Category: home, Document Count: 4
Category: cameras, Document Count: 3
Category: electronics, Document Count: 2


## Pagination


To specify the from/size parameters, use the Python slicing API:


In [51]:
s = s[10:20]
# {"from": 10, "size": 10}

s = s[:20]
# {"size": 20}

s = s[10:]
# {"from": 10}

s = s[10:20][2:]
# {"from": 12, "size": 8}