Connect to the Elasticsearch

In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch
from config.development import Config

es = Elasticsearch(
    Config.ES_HOST, 
    verify_certs=True, 
    basic_auth=(Config.ES_USERNAME, Config.ES_PASSWORD)
)

client = es.info()
print("Connected to the elasticsearch server ✔")
pprint(client.body)


Connected to the elasticsearch server ✔
{'cluster_name': 'elasticsearch',
 'cluster_uuid': 'tdYmEtALQyuf5oSJGl74OQ',
 'name': 'david-server',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-06-19T01:37:57.847711500Z',
             'build_flavor': 'default',
             'build_hash': '580aff1a0064ce4c93293aaab6fcc55e22c10d1c',
             'build_snapshot': False,
             'build_type': 'deb',
             'lucene_version': '8.11.3',
             'minimum_index_compatibility_version': '6.0.0-beta1',
             'minimum_wire_compatibility_version': '6.8.0',
             'number': '7.17.29'}}


In [2]:
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [3]:
import json 
from tqdm import tqdm 

document_ids = []
dummy_data = json.load(open("../data/dummy_data.json"))

for document in tqdm(dummy_data, total=len(dummy_data)): 
    response = es.index(index="my_index", body=document)
    document_ids.append(response['_id']) 

100%|██████████| 5/5 [00:00<00:00, 28.00it/s]


In [4]:
document_ids

['ze4d8ZkBtfwvgmKSmcCe',
 'zu4d8ZkBtfwvgmKSmcDl',
 'z-4d8ZkBtfwvgmKSmcD9',
 '0O4d8ZkBtfwvgmKSmsAf',
 '0e4d8ZkBtfwvgmKSmsA4']

1. If the document exists in the index

In [5]:
# Prnt again the document data in here 
response = es.get(index="my_index", id=document_ids[0])
pprint(response.body)

{'_id': 'ze4d8ZkBtfwvgmKSmcCe',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 0,
 '_source': {'content': 'This is sample content first entry',
             'created_on': '2023-10-01',
             'title': 'Sample title 1'},
 '_type': '_doc',
 '_version': 1,
 'found': True}


1.1. Update an existing fields

In [6]:
# How to perform an update document 
response = es.update(
    index="my_index", 
    id = document_ids[0], 
    script = {
        "source": "ctx._source.content = params.content", 
        "source": "ctx._source.created_on = params.created_on", 
        "source": "ctx._source.title = params.title",
        # I want to updated the multiple context params
        "params": {
            "content": "Updated new content with new context"
        }, 
        "params": {
            "title": "New Updated In here"
        }, 
        "params": {
            "created_on": "2025-10-10"
        }
    }
)

pprint(response.body)

{'_id': 'ze4d8ZkBtfwvgmKSmcCe',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 5,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': '_doc',
 '_version': 2,
 'result': 'updated'}


In [7]:
from pprint import pprint
response = es.get(index="my_index", id = document_ids[0])
pprint(response.body)

{'_id': 'ze4d8ZkBtfwvgmKSmcCe',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 5,
 '_source': {'content': 'This is sample content first entry',
             'created_on': '2023-10-01',
             'title': None},
 '_type': '_doc',
 '_version': 2,
 'found': True}


1.2 Add a new field in the document

1.2.1 New Field Method

In [8]:
# How to perform add new field in the document 
response = es.update(
    index="my_index", 
    id = document_ids[0], 
    script = {
        "source": "ctx._source.newfield = 'add new field dummy data'", 
    }
)

pprint(response.body)

{'_id': 'ze4d8ZkBtfwvgmKSmcCe',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 6,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': '_doc',
 '_version': 3,
 'result': 'updated'}


In [9]:
# Look and response in here 
response = es.get(index="my_index", id=document_ids[0])
pprint(response.body)

{'_id': 'ze4d8ZkBtfwvgmKSmcCe',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 6,
 '_source': {'content': 'This is sample content first entry',
             'created_on': '2023-10-01',
             'newfield': 'add new field dummy data',
             'title': None},
 '_type': '_doc',
 '_version': 3,
 'found': True}


We can Add new index also with the doc = {}

In [10]:
response = es.update(
    index="my_index", 
    id = document_ids[0], 
    doc = {
        "Field 1": "new field in here 1", 
        "Field 2": "new field in here 2"
    }
)

pprint(response)

ObjectApiResponse({'_index': 'my_index', '_type': '_doc', '_id': 'ze4d8ZkBtfwvgmKSmcCe', '_version': 4, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7, '_primary_term': 1})


In [11]:
# Print the data result in here 
response = es.get(index="my_index", id=document_ids[0])
pprint(response.body)

{'_id': 'ze4d8ZkBtfwvgmKSmcCe',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 7,
 '_source': {'Field 1': 'new field in here 1',
             'Field 2': 'new field in here 2',
             'content': 'This is sample content first entry',
             'created_on': '2023-10-01',
             'newfield': 'add new field dummy data',
             'title': None},
 '_type': '_doc',
 '_version': 4,
 'found': True}


How to perform an remove an index file is 

In [12]:
# Perform and remove index I will remove in here 
response = es.update(
    index="my_index", 
    id = document_ids[0], 
    script = {
        "source": "ctx._source.remove('Field 1')", 
        "source": "ctx._source.remove('Field 2')"
    }
)

pprint(response.body)


{'_id': 'ze4d8ZkBtfwvgmKSmcCe',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 8,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': '_doc',
 '_version': 5,
 'result': 'updated'}


In [13]:
# Print the response data in here 
response = es.get(index="my_index", id = document_ids[0])
pprint(response.body)

{'_id': 'ze4d8ZkBtfwvgmKSmcCe',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 8,
 '_source': {'Field 1': 'new field in here 1',
             'content': 'This is sample content first entry',
             'created_on': '2023-10-01',
             'newfield': 'add new field dummy data',
             'title': None},
 '_type': '_doc',
 '_version': 5,
 'found': True}


In [14]:
document_ids

['ze4d8ZkBtfwvgmKSmcCe',
 'zu4d8ZkBtfwvgmKSmcDl',
 'z-4d8ZkBtfwvgmKSmcD9',
 '0O4d8ZkBtfwvgmKSmsAf',
 '0e4d8ZkBtfwvgmKSmsA4']

Performs the updated data, if the document is'nt not exist.

In [15]:
from pprint import pprint

response = es.update(
    index = "my_index", 
    id = 1, 
    doc = {
        "body 1": "Get the body 1", 
        "body 2": "Get the body 2"
    },
    doc_as_upsert= True, 
)

pprint(response.body)

{'_id': '1',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 9,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': '_doc',
 '_version': 1,
 'result': 'created'}


In [16]:
# Perfrom and look the response of the data 
from pprint import pprint
data_result = es.get(index="my_index", id=1)
pprint(data_result.body)

# Count of the dat 
data_count = es.count(index="my_index")
result = data_count['count']
print(f"Result of data counting ", result)

{'_id': '1',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 9,
 '_source': {'body 1': 'Get the body 1', 'body 2': 'Get the body 2'},
 '_type': '_doc',
 '_version': 1,
 'found': True}
Result of data counting  0
