Connect to elasticsearch

In [7]:
# Connection to Elasticsearch 
from pprint import pprint
from elasticsearch import Elasticsearch
from config.development import Config

es = Elasticsearch(
    Config.ES_HOST, 
    verify_certs=True, 
    basic_auth=(Config.ES_USERNAME, Config.ES_PASSWORD)
)

client_info = es.info()
print("Connected to the elasticsearch ✔")
pprint(client_info.body)

Connected to the elasticsearch ✔
{'cluster_name': 'elasticsearch',
 'cluster_uuid': 'tdYmEtALQyuf5oSJGl74OQ',
 'name': 'david-server',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-06-19T01:37:57.847711500Z',
             'build_flavor': 'default',
             'build_hash': '580aff1a0064ce4c93293aaab6fcc55e22c10d1c',
             'build_snapshot': False,
             'build_type': 'deb',
             'lucene_version': '8.11.3',
             'minimum_index_compatibility_version': '6.0.0-beta1',
             'minimum_wire_compatibility_version': '6.8.0',
             'number': '7.17.29'}}


In [8]:
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

Using a dummy_data.json file to insert multiple document in index. We stored the id of each document in document_id

In [9]:
import json
from tqdm import tqdm

document_ids = []
dummy_data = json.load(open("../data/dummy_data.json"))
for document in tqdm(dummy_data, total=len(dummy_data)):
    response = es.index(index="my_index", body=document)
    document_ids.append(response['_id'])

100%|██████████| 5/5 [00:00<00:00, 31.13it/s]


Count Operation

In [10]:
response = es.count(index="my_index")

count = response["count"]
print(f"The number of count index is {count}")

The number of count index is 0


In [11]:
es.ping()

True

This example how to use the query parameter to match certain criteria data

In [12]:
# How to query data in elasticsearch 
query = {
    "range": {
        "created_on": {
            "gte": "2023-10-02", 
            "lte": "2023-10-03", 
            "format": "yyyy-MM-dd"
        }
    }
}

response = es.count(index="my_index", query=query)
pprint(response.body)
data_count = response["count"]
print(f"Data count after filters range between is {data_count}")

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'count': 0}
Data count after filters range between is 0
