# Search APIs

In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

In [4]:
# create a elastic search client

es= Elasticsearch("http://localhost:9200")
client_info=es.info()

print("Connected successfully to the ElasticSearch server !")
pprint(client_info.body)

Connected successfully to the ElasticSearch server !
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'XoT9Xm1xR3O6L3zfFdu5nQ',
 'name': 'd8153502b8b9',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2023-02-13T09:35:20.314882762Z',
             'build_flavor': 'default',
             'build_hash': '2d58d0f136141f03239816a4e360a8d17b6d8f29',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.4.2',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.6.2'}}


In [5]:
# search all documents

In [6]:
search_all_in_all_indices=es.search(
    index="_all",
    query={"match_all":{}}
)

In [13]:
search_all_in_all_indices.body["hits"]["hits"].__len__()

7

In [14]:
search_all_in_all_indices.body["hits"]

{'total': {'value': 7, 'relation': 'eq'},
 'max_score': 1.0,
 'hits': [{'_index': 'bulk_api_index',
   '_id': '4',
   '_score': 1.0,
   '_source': {'field1': 'field 1 value', 'field2': 'field 2 value'}},
  {'_index': 'bulk_api_index',
   '_id': '_ajeNpUBUdIER6xMfbEd',
   '_score': 1.0,
   '_source': {'doc': {'field1': 'value31',
     'field2': 'value32',
     'field3': 3}}},
  {'_index': 'bulk_api_index',
   '_id': '2',
   '_score': 1.0,
   '_source': {'doc': {'field3': 134,
     'field4': 'hey adding an extra field in the doc'}}},
  {'_index': 'update_api_test_index',
   '_id': '-6idNpUBUdIER6xM_7HT',
   '_score': 1.0,
   '_source': {'test_field': 'test content2'}},
  {'_index': 'update_api_test_index',
   '_id': '_KidNpUBUdIER6xM_7Hc',
   '_score': 1.0,
   '_source': {'test_field': 'test content3'}},
  {'_index': 'update_api_test_index',
   '_id': '-qidNpUBUdIER6xM_7Gh',
   '_score': 1.0,
   '_source': {'test_field': 'updated_data',
    'new_field': 'hey this is new_field value 2 aft

In [37]:
docs=[
    {
        "title":"Sample title 1",
        "text":"this is the first sample document text.",
        "created_on":'2024-09-22'
    },
    {
        "title":"Sample title 2",
        "text":"Here is another sample of a document text.",
        "created_on":'2024-09-24'
    },
    {
        "title":"Sample title 3",
        "text":"the content of the third document goes here",
        "created_on":'2024-09-24'
    }
]

In [38]:
# create a search_index

es.indices.delete(index="search_index", ignore_unavailable=True)

es.indices.create(index="search_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'search_index'})

In [39]:
res_list=[]
for doc in docs:
    res_list.append(es.index(index="search_index", document=doc).body)

### Leaf clauses

In [46]:
# search
term_res=es.search(
    index="search_index",
    body={
        "query":{
            "term":{
                "created_on":"2024-09-24"
            }
        }
    }
)

In [47]:
n_hits=term_res.body['hits']["total"]['value']

In [48]:
print(f"found {n_hits} documents in search_index.")

found 2 documents in search_index.


In [52]:
# match query search
match_res=es.search(
    index="search_index",
    body={
        "query":{
            "match":{
                "text":"document"
            }
        }
    }
)

In [53]:
match_count=match_res.body["hits"]['total']['value']
print(f"found {match_count} documents in search_index.")

found 3 documents in search_index.


In [58]:
# range query search
range_res=es.search(
    index="search_index",
    body={
        "query":{
            "range":{
                "created_on":{
                    "gte":"2024-09-22",
                    "lte":"2024-09-24"
                }
            }
        }
    }
)

In [59]:
n_hits=range_res.body['hits']['total']['value']
print(f"found {n_hits} documents in search_index.")


found 3 documents in search_index.


### Compound clauses
---
1.  `bool`:
-   Combines multiple queries using boolean logic:
    -   `must`, `filter`, `should`, `must_not`
-   The field must be mapped to a `text` data type.


In [73]:
response=es.search(
    index="search_index",
    body={
        "query":{
            "bool":{
                "must":[
                    {
                        "match":{
                            "text":"document"
                        }
                    },
                    {
                        "range":{
                            "created_on":{
                                "gte":"2024-09-22",
                                "lt":"2024-09-24"
                            }
                        }
                    }
                ]
            }
        }
    }
)

In [74]:
n_hits=response.body['hits']['total']['value']

print(f"found {n_hits} documents in search_index.")


found 1 documents in search_index.


In [81]:
for data in response.body['hits']['hits']:
    pprint({key:value for key,value in data.items() if key in ["_id","_source"]})

{'_id': '_qiFN5UBUdIER6xMjrGm',
 '_source': {'created_on': '2024-09-22',
             'text': 'this is the first sample document text.',
             'title': 'Sample title 1'}}


## aggregations