In [2]:
from pprint import pprint
from elasticsearch import Elasticsearch
from dotenv import load_dotenv
import os

# Correct path with double backslashes
env_path = os.path.join('e:\\', 'Study Space', 'Python Workspace', 'ELastic Search', '.env.local')

# Load environment variables
load_dotenv(dotenv_path=env_path)

# Print debugging information
print("Current working directory:", os.getcwd())
print("Environment file path:", env_path)

# Get the LOCALHOST variable
LOCALHOST = os.getenv('LOCALHOST')
print("Raw LOCALHOST value:", repr(LOCALHOST))

# Ensure LOCALHOST is properly processed
if not LOCALHOST:
    print("LOCALHOST not found in environment file.")
    # Fallback to default if not found
    LOCALHOST = "http://localhost:9200/"
else:
    print("LOCALHOST found in environment file.")
    # Remove quotes if present
    LOCALHOST = LOCALHOST.strip('"')

print("Processed LOCALHOST value:", repr(LOCALHOST))

# Connect to Elasticsearch
try:
    es = Elasticsearch([LOCALHOST])
    client_info = es.info()
    print('Connected to Elasticsearch!')
    pprint(client_info.body)
except Exception as e:
    print(f"Connection error: {e}")

Current working directory: e:\Study Space\Python Workspace\ELastic Search
Environment file path: e:\Study Space\Python Workspace\ELastic Search\.env.local
Raw LOCALHOST value: 'http://localhost:9200/'
LOCALHOST found in environment file.
Processed LOCALHOST value: 'http://localhost:9200/'
Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'AKPh90H1StWquQfBPE4Chw',
 'name': 'b66a5ae1a4a1',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2024-08-05T10:05:34.233336849Z',
             'build_flavor': 'default',
             'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.11.1',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.15.0'}}


In [3]:
es.indices.delete(index="base_index", ignore_unavailable=True)
es.indices.create(index="base_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'base_index'})

In [4]:
import json
from tqdm import tqdm


document_ids = []
dummy_data = json.load(open("./data/blogs.json"))
for document in tqdm(dummy_data, total=len(dummy_data)):
    response = es.index(index='my_index', body=document)
    document_ids.append(response['_id'])

100%|██████████| 3/3 [00:00<00:00, 34.16it/s]


In [5]:
print(document_ids)

['-1Qu25MByXxkAIfFehkf', '_FQu25MByXxkAIfFehlR', '_VQu25MByXxkAIfFehlj']


## **DELETE Index**
This is an example of a successful operation. If the `ID` exists in the document, the `delete` operation won't return any errors.

In [6]:
response = es.delete(index='my_index', id=document_ids[0])
pprint(response.body)

{'_id': '-1Qu25MByXxkAIfFehkf',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 17,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 2,
 'result': 'deleted'}


This example shows that the delete operation fails when providing it with an ID that does not exist in the index.

In [7]:
try:
    response = es.delete(index='my_index', id="id")
except Exception as e:
    print(e)

NotFoundError(404, "{'_index': 'my_index', '_id': 'id', '_version': 1, 'result': 'not_found', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 18, '_primary_term': 1}")



## **Get operation**

This is an example of a successful operation. If the ID exists in the document, the get operation won't return any errors.


In [8]:
response = es.get(index='my_index', id=document_ids[1])
pprint(response.body)

{'_id': '_FQu25MByXxkAIfFehlR',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 15,
 '_source': {'author': 'Jane Doe',
             'content': 'Elastic Stack is a collection of products that helps '
                        'you to store, search, analyze, and visualize data.',
             'published_date': '2024-12-17',
             'title': 'Elastic Stack'},
 '_version': 1,
 'found': True}


In [9]:
try:
    response = es.get(index='my_index', id="id")
except Exception as e:
    print(e)

NotFoundError(404, "{'_index': 'my_index', '_id': 'id', 'found': False}")


## **Search operation**

In [17]:
# Fetch all documents (adjust the size for your needs)
search_result = es.search(index="my_index", body={"query": {"match_all": {}}}, size=1000)
# Print documents
for hit in search_result['hits']['hits']:
    print(f"ID: {hit['_id']}, Source: {hit['_source']}")

ID: 81T11pMByXxkAIfFZhnL, Source: {'title': 'Elastic Stack', 'author': 'Jane Doe', 'content': 'Elastic Stack is a collection of products that helps you to store, search, analyze, and visualize data.', 'published_date': '2024-12-17'}
ID: 9FT11pMByXxkAIfFZhnZ, Source: {'title': 'Kibana', 'author': 'John Doe', 'content': 'Kibana is a data visualization tool that is part of the Elastic Stack.', 'published_date': '2024-12-18'}
ID: 9VT81pMByXxkAIfFRBnG, Source: {'title': 'Elasticsearch Basics', 'author': 'John Doe', 'content': 'Elasticsearch is a search engine based on Lucene.', 'published_date': '2024-12-16'}
ID: 9lT81pMByXxkAIfFRBn4, Source: {'title': 'Elastic Stack', 'author': 'Jane Doe', 'content': 'Elastic Stack is a collection of products that helps you to store, search, analyze, and visualize data.', 'published_date': '2024-12-17'}
ID: 91T81pMByXxkAIfFRRkJ, Source: {'title': 'Kibana', 'author': 'John Doe', 'content': 'Kibana is a data visualization tool that is part of the Elastic Sta

  search_result = es.search(index="my_index", body={"query": {"match_all": {}}}, size=1000)


## **Count operation**

In [11]:
result = es.count(index="my_index")
count = result["count"]

print(f"The number of documents in the index is {count}")

The number of documents in the index is 9


## **Exisits API**

In [12]:
response = es.indices.exists(index='my_index')
response.body

True

In [13]:
response = es.exists(index='my_index', id=document_ids[0])
response.body

False

## **Update Operation**

### **Update using the script method in update API**

In [14]:
update_response = es.update(index='my_index'
                           , id=document_ids[1],
                           script={
                            "source": "ctx._source.title = params.title",
                            "params" : {
                                "title" : "Elastic search Freecodecamp"
                            }
                           }
                        )
pprint(update_response.body)

{'_id': '_FQu25MByXxkAIfFehlR',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 19,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 2,
 'result': 'updated'}


In [15]:
response = es.get(index='my_index', id=document_ids[1])
pprint(response.body)

{'_id': '_FQu25MByXxkAIfFehlR',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 19,
 '_source': {'author': 'Jane Doe',
             'content': 'Elastic Stack is a collection of products that helps '
                        'you to store, search, analyze, and visualize data.',
             'published_date': '2024-12-17',
             'title': 'Elastic search Freecodecamp'},
 '_version': 2,
 'found': True}


### **Update using the doc method in update API**

In [16]:
update_doc_response=es.update(index="my_index",
          id=document_ids[2],
          doc={
              "title": "Elastic search Freecodecamp 2",
              "author": "Imad from Youtube from 2CodeCampers channel"
          }
          )
pprint(update_doc_response.body)

{'_id': '_VQu25MByXxkAIfFehlj',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 20,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 2,
 'result': 'updated'}


### **Update API : Remove a field**

In [26]:
updated_document_ids= []
for id in search_result['hits']['hits']:
    updated_document_ids.append(id['_id'])

In [27]:
remove_response = es.update(index="my_index",
                            id=updated_document_ids[3],
                            script={
                                "source": "ctx._source.remove('published_date')",
                            },
                            
                            )
pprint(remove_response.body)

{'_id': '9lT81pMByXxkAIfFRBn4',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 21,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 2,
 'result': 'updated'}


## **Upsert Operation** If documents doesn't exist in the index

* We use doc_as_upsert to tell Elasticsearch that if the document does not exist, it should be inserted as a new document.


In [28]:
response = es.update(
    index="my_index",
    id="1",
    doc={
        "book_id": 1234,
        "book_name": "A book",
    },
    doc_as_upsert=True,
)
pprint(response.body)

{'_id': '1',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 22,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 1,
 'result': 'created'}


In [29]:
response = es.count(index='my_index')
response['count']

10