**Connect to Elasticsearch Server**

In [1]:
from pprint import pprint 
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")

client_info = es.info()

print("Connected to Elasticsearch!")
pprint(client_info.body)

Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'T1HeaWnRTOqX_BBgREVVbA',
 'name': '64c49e436740',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-10-21T10:06:21.288851013Z',
             'build_flavor': 'default',
             'build_hash': '25d88452371273dd27356c98598287b669a03eae',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '10.3.1',
             'minimum_index_compatibility_version': '8.0.0',
             'minimum_wire_compatibility_version': '8.19.0',
             'number': '9.2.0'}}


**Create Index with Auto Mappings**

In [3]:
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

**Without bulk API**
#
Let's index the documents sequentially.

In [4]:
from tqdm import tqdm
import json 

documents_ids = []
documents = json.load(open("data.json"))

for document in tqdm(documents, total=len(documents)):
    response = es.index(index="my_index", body=document)
    documents_ids.append(response["_id"])

documents

100%|██████████| 5/5 [00:01<00:00,  4.25it/s]


[{'title': 'Sample Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'},
 {'title': 'Sample Title 2',
  'text': 'Elasticsearch makes searching and analyzing large amounts of data fast and efficient.',
  'created_on': '2024-10-05'},
 {'title': 'Sample Title 3',
  'text': 'Django Rest Framework simplifies API development with powerful serialization tools.',
  'created_on': '2024-11-14'},
 {'title': 'Sample Title 4',
  'text': 'Python provides a wide range of libraries for data analysis, automation, and backend development.',
  'created_on': '2025-01-10'},
 {'title': 'Sample Title 5',
  'text': 'FastAPI is an excellent choice for building high-performance APIs with async capabilities.',
  'created_on': '2025-02-02'}]

Let's update the first and second documents

In [5]:
es.update(
    index="my_index",
    id=documents_ids[0],
    script={
        "source": "ctx._source.title=params.title",
        "params": {
            "title": "Updated by Shamim!"
        }
    }
)

ObjectApiResponse({'_index': 'my_index', '_id': 'C5KuP5oBsuuLZ2nE8zLy', '_version': 2, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 5, '_primary_term': 1})

In [6]:
es.update(
    index="my_index",
    id=documents_ids[1],
    script={
        "source": "ctx._source.add_new_field='dummy_data_by_shamim'"
    }
)

ObjectApiResponse({'_index': 'my_index', '_id': 'DJKuP5oBsuuLZ2nE9zIl', '_version': 2, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 1})

Let's delete the third element

In [7]:
es.delete(index="my_index", id=documents_ids[2])

ObjectApiResponse({'_index': 'my_index', '_id': 'DZKuP5oBsuuLZ2nE9zKX', '_version': 2, 'result': 'deleted', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7, '_primary_term': 1})

**Bulk API**
#
Previously, we executed each operation one at a time, with each action requiring a separate API call. This approach is slow and inefficient. Now, let’s see how to accomplish the same task using the bulk API.

In [10]:
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [11]:
response = es.bulk(
    operations=[
        # action-1
        {
            "index": {
                "_index": "my_index",
                "_id":"1"
            }
        },
        # source 1
        {
            "title": "Sample Title 1",
            "text": "This is the first sample document text.",
            "created_on": "2024-09-22"
        },
        # action-2
        {
            "index": {
                "_index": "my_index",
                "_id": "2"
            }
        },
        # source 2
        {
            "title": "Sample Title 2",
            "text": "Here is another example of a document.",
            "created_on": "2024-09-24"
        },
        # action-3
        {
            "index": {
                "_index": "my_index",
                "_id": "3"
            }
        },
        # source 3
        {
            "title": "Sample Title 3",
            "text": "The content of the third document goes here.",
            "created_on": "2024-09-24"
        },
        # action-4
        {
            "update": {
                "_index": "my_index",
                "_id": "1"
            }
        },
        # source 4
        {
            "doc": {
                "title": "New title by Shamim"
            }
        },
        # action-5
        {
            "update": {
                "_index": "my_index", 
                "_id": "1"
            }
        },
        # source 5 
        {
            "doc": {
                "new_field": "dummy_data_by_shamim"
            }
        },
        # action-6
        {
            "delete": {
                "_index": "my_index",
                "_id": "3", 
            }
        },
    ]
)

pprint(response.body)

{'errors': False,
 'items': [{'index': {'_id': '1',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 0,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': '2',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 1,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': '3',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 2,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
        

If errors is False, it means the bulk API successfully executed all the actions.

In [12]:
response.body["errors"]

False