Connect to ElasticSearce Server

In [1]:
from pprint import pprint 
from elasticsearch import Elasticsearch 

es = Elasticsearch("http://localhost:9200")

client_info = es.info()
print("Connected to Elasticsearch successfully!")
pprint(client_info.body)

Connected to Elasticsearch successfully!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'T1HeaWnRTOqX_BBgREVVbA',
 'name': '1de29c077d1c',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-10-21T10:06:21.288851013Z',
             'build_flavor': 'default',
             'build_hash': '25d88452371273dd27356c98598287b669a03eae',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '10.3.1',
             'minimum_index_compatibility_version': '8.0.0',
             'minimum_wire_compatibility_version': '8.19.0',
             'number': '9.2.0'}}


Create index ("my_index")

In [2]:
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

Insert One document

In [3]:
document = {
    'title': 'title',
    'text': 'text',
    'created_on': '2024-09-22',
}

# insert to index
response = es.index(index="my_index", body=document)
response

ObjectApiResponse({'_index': 'my_index', '_id': '8LG6NpoBpkg4CNbFmMNb', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

The response object contains the result of the operation. If we successfully inserted the document, then `result = created`. Each document has an `id` and is fragmented into `shards`.

In [5]:
print(response["result"])

created


In [6]:
print(response["_id"])

8LG6NpoBpkg4CNbFmMNb


In [7]:
print(response["_shards"])

{'total': 2, 'successful': 1, 'failed': 0}


In [8]:
print(response["_index"])

my_index


Insert Multiples Documents

In [9]:
import json 

dummy_data = json.load(open("data.json"))
dummy_data

[{'title': 'Sample Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'},
 {'title': 'Sample Title 2',
  'text': 'Elasticsearch makes searching and analyzing large amounts of data fast and efficient.',
  'created_on': '2024-10-05'},
 {'title': 'Sample Title 3',
  'text': 'Django Rest Framework simplifies API development with powerful serialization tools.',
  'created_on': '2024-11-14'},
 {'title': 'Sample Title 4',
  'text': 'Python provides a wide range of libraries for data analysis, automation, and backend development.',
  'created_on': '2025-01-10'},
 {'title': 'Sample Title 5',
  'text': 'FastAPI is an excellent choice for building high-performance APIs with async capabilities.',
  'created_on': '2025-02-02'}]

In [21]:
def insert_document(document):
    return es.index(index="my_index", body=document)

def print_info(response):
    print(f"Document ID: {response["_id"]} is {response["result"]} and is split into {response["_shards"]["total"]} shards.")

In [22]:
for document in dummy_data:
    response = insert_document(document)
    print_info(response)

Document ID: 9rHSNpoBpkg4CNbFfMMY is created and is split into 2 shards.
Document ID: 97HSNpoBpkg4CNbFfMNc is created and is split into 2 shards.
Document ID: -LHSNpoBpkg4CNbFfMOy is created and is split into 2 shards.
Document ID: -bHSNpoBpkg4CNbFfcMJ is created and is split into 2 shards.
Document ID: -rHSNpoBpkg4CNbFfcNe is created and is split into 2 shards.


Print Mappings

In [23]:
from pprint import pprint 
index_mapping = es.indices.get_mapping(index="my_index")
pprint(index_mapping["my_index"]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


Manual Mapping

In [24]:
# delete old indices 
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index")

mappings = {
    'properties': {
        'created_on': {"type": 'date'},
        'text': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        },
        'title': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        }
    }
}

es.indices.put_mapping(index='my_index', body=mappings)
index_mapping = es.indices.get_mapping(index="my_index")
pprint(index_mapping['my_index']['mappings']['properties'])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


Create Index with Mapping

In [25]:
# delete old index 
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index='my_index', mappings=mappings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [26]:
index_mapping = es.indices.get_mapping(index="my_index")
pprint(index_mapping["my_index"]["mappings"]["properties"], indent=3)

{  'created_on': {'type': 'date'},
   'text': {  'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
              'type': 'text'},
   'title': {  'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
               'type': 'text'}}
