## CONNECT TO ELASTICSEARCH

In [2]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=("elastic", "So0xTl9tCAdXqx4JniGO"),
    ca_certs="http_ca.crt",
)

client_info = es.info()

print("connected to elasticsearch")
pprint(client_info.body)


connected to elasticsearch
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'Ptp3XLACR3-Wmx0PcdxoZA',
 'name': 'c899a61c3425',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-08-24T22:05:04.526302670Z',
             'build_flavor': 'default',
             'build_hash': '0c781091a2f57de895a73a1391ff8426c0153c8d',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '10.2.2',
             'minimum_index_compatibility_version': '8.0.0',
             'minimum_wire_compatibility_version': '8.19.0',
             'number': '9.1.3'}}


### Insert One Document

In [3]:
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index")


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [4]:
document = {
    "title": 'title',
    "text": 'text',
    "crated_on": '2024-09-22'
}
response = es.index(index="my_index", body=document)
response

ObjectApiResponse({'_index': 'my_index', '_id': 'JZUbTJkBza3mxyTR46Wn', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [5]:
print(response['result'])

created


In [6]:
print(response['_shards'])

{'total': 2, 'successful': 1, 'failed': 0}


In [7]:
print(response['_id'])

JZUbTJkBza3mxyTR46Wn


In [8]:
print((response['_index']))

my_index


#### Insert multiple documents


In [10]:
import json

dummy_data = json.load(open('./data/dummy_data.json'))
dummy_data

[{'title': 'Sample Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'},
 {'title': 'Sample Title 2',
  'text': 'Here is another example of a document.',
  'created_on': '2024-09-24'},
 {'title': 'Sample Title 3',
  'text': 'The content of the third document goes here.',
  'created_on': '2024-09-24'}]

In [11]:
def insert_doc(document):
    response = es.index(index="my_index", body=document)
    return response

def print_info(response):
    print(f"""Document ID: {response['_id']} is '{response['result']}' and is split into {response['_shards']['total']} shards.""")

for document in dummy_data:
    response = insert_doc(document)
    print_info(response)

Document ID: JpUiTJkBza3mxyTRIKW9 is 'created' and is split into 2 shards.
Document ID: J5UiTJkBza3mxyTRIaWh is 'created' and is split into 2 shards.
Document ID: KJUiTJkBza3mxyTRIaX5 is 'created' and is split into 2 shards.


##### print mapping

In [13]:
from pprint import pprint

index_mapping = es.indices.get_mapping(index="my_index")
pprint(index_mapping['my_index']['mappings']['properties'])

{'crated_on': {'type': 'date'},
 'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


In [None]:
#### manual mapping

In [14]:
es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index")

mapping = {
    "properties": {
        "created_on": {"type": "date"},

        'text': {
            "type": 'text',
            'fields':{
                'keyword':{
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        },

        'title': {
            'type': 'text',
            'fields': {
                'keyword':{
                    'type': 'keyword',
                    'ignore_above': 256
                    }
            }
        }
    }
}

es.indices.put_mapping(index="my_index", body=mapping)

index_mapping = es.indices.get_mapping(index="my_index")
pprint(index_mapping['my_index']['mappings']['properties'])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


In [15]:
mapping = {
    "properties": {
        "created_on": {"type": "date"},

        'text': {
            "type": 'text',
            'fields':{
                'keyword':{
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        },

        'title': {
            'type': 'text',
            'fields': {
                'keyword':{
                    'type': 'keyword',
                    'ignore_above': 256
                    }
            }
        }
    }
}

es.indices.delete(index="my_index", ignore_unavailable=True)
es.indices.create(index="my_index", mappings=mapping)


index_mapping = es.indices.get_mapping(index="my_index")
pprint(index_mapping['my_index']['mappings']['properties'])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}
