In [1]:

%pip install opensearch-py

Collecting opensearch-py
  Downloading opensearch_py-3.1.0-py3-none-any.whl.metadata (7.2 kB)
Collecting Events (from opensearch-py)
  Downloading Events-0.5-py3-none-any.whl.metadata (3.9 kB)
Collecting opensearch-protobufs==0.19.0 (from opensearch-py)
  Downloading opensearch_protobufs-0.19.0-py3-none-any.whl.metadata (678 bytes)
Downloading opensearch_py-3.1.0-py3-none-any.whl (385 kB)
Downloading opensearch_protobufs-0.19.0-py3-none-any.whl (39 kB)
Downloading Events-0.5-py3-none-any.whl (6.8 kB)
Installing collected packages: Events, opensearch-protobufs, opensearch-py
Successfully installed Events-0.5 opensearch-protobufs-0.19.0 opensearch-py-3.1.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import logging
import os

log_dir = "./concepts/logs"
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(log_dir, "app.log")

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

In [4]:
from opensearchpy import OpenSearch

client = OpenSearch(
    hosts=[{"host":"localhost", "port":"9200"}],
    http_auth=("admin","YourStrongPassword123!"),
    use_ssl=True,
    verify_certs=False,
    ssl_show_warn=False
)
info = client.info()
# print(f"Connected to: {info['version']['distribution']} {info['version']['number']}")
logger.info(f"Connected to: {info['version']['distribution']} {info['version']['number']}")

2025-12-28 20:03:13,859 - opensearch - INFO - GET https://localhost:9200/ [status:200 request:0.536s]
2025-12-28 20:03:13,860 - __main__ - INFO - Connected to: opensearch 3.4.0


In [6]:
index_name="employees"

mapping={
    "settings" :{
        "number_of_shards":1,
        "number_of_replicas":0
    },
    "mappings" :{
    "properties":{
        "employee_id": {"type":"keyword"},
        "name":{"type": "text",
                "fields":{
                    "keyword":{"type":"keyword"}
                }},
                    "email": {"type": "keyword"},
            "department": {"type": "keyword"},
            "salary": {"type": "double"},
            "hire_date": {"type": "date"},
            "is_active": {"type": "boolean"},
            "skills": {"type": "text"}
    }
    }
}

try:
    response= client.indices.create(index=index_name, body=mapping)
    logger.info(f"Index '{index_name}' created successfully!")
    logger.info(response)
except Exception as e:
    logger.error(f"Error: {e}")

2025-12-28 20:07:21,536 - opensearch - INFO - PUT https://localhost:9200/employees [status:200 request:0.288s]
2025-12-28 20:07:21,537 - __main__ - INFO - Index 'employees' created successfully!
2025-12-28 20:07:21,538 - __main__ - INFO - {'acknowledged': True, 'shards_acknowledged': True, 'index': 'employees'}


In [7]:
# Insert one employee
employee_1 = {
    "employee_id": "EMP001",
    "name": "Alice Johnson",
    "email": "alice@company.com",
    "department": "Engineering",
    "salary": 95000.00,
    "hire_date": "2022-03-15",
    "is_active": True,
    "skills": ["Python", "Java", "AWS"]
}

response = client.index(index=index_name, body=employee_1, id=1)
logger.info(f"Document indexed: {response['result']}")
logger.info(f"Document ID: {response['_id']}")

2025-12-28 20:09:03,110 - opensearch - INFO - PUT https://localhost:9200/employees/_doc/1 [status:201 request:0.033s]
2025-12-28 20:09:03,110 - __main__ - INFO - Document indexed: created
2025-12-28 20:09:03,111 - __main__ - INFO - Document ID: 1


In [19]:
# insert with Auto-generated
employee_2 = {
    "employee_id": "EMP002",
    "name": "Bob Smith",
    "email": "bob@company.com",
    "department": "Marketing",
    "salary": 75000.00,
    "hire_date": "2023-01-10",
    "is_active": True,
    "skills": ["SEO", "Content Marketing", "Analytics"]
}

# No 'id' parameter - OpenSearch generates ID
response = client.index(
    index='employees',
    body=employee_2
)

logger.info(f"Auto-generated ID: {response['_id']}")

2025-12-28 20:19:36,093 - opensearch - INFO - POST https://localhost:9200/employees/_doc [status:201 request:0.029s]
2025-12-28 20:19:36,094 - __main__ - INFO - Auto-generated ID: ADSdZpsBNh2X3J1HzEkI


In [22]:
response=client.get(index=index_name, id='ADSdZpsBNh2X3J1HzEkI')
response

2025-12-28 20:20:05,554 - opensearch - INFO - GET https://localhost:9200/employees/_doc/ADSdZpsBNh2X3J1HzEkI [status:200 request:0.006s]


{'_index': 'employees',
 '_id': 'ADSdZpsBNh2X3J1HzEkI',
 '_version': 1,
 '_seq_no': 1,
 '_primary_term': 1,
 'found': True,
 '_source': {'employee_id': 'EMP002',
  'name': 'Bob Smith',
  'email': 'bob@company.com',
  'department': 'Marketing',
  'salary': 75000.0,
  'hire_date': '2023-01-10',
  'is_active': True,
  'skills': ['SEO', 'Content Marketing', 'Analytics']}}

In [24]:
query = {
    "query":{
        "match_all":{}
    }
}

response = client.search(index=index_name, body=query)
response['hits']['hits'][1]['_source']['name']

2025-12-28 20:20:16,414 - opensearch - INFO - POST https://localhost:9200/employees/_search [status:200 request:0.006s]


'Bob Smith'

In [25]:
from opensearchpy import helpers

# Bulk insert
employees = [
    {
        "_index": "employees",
        "_id": 2,
        "_source": {
            "employee_id": "EMP003",
            "name": "Carol White",
            "email": "carol@company.com",
            "department": "Engineering",
            "salary": 105000.00,
            "hire_date": "2021-06-20",
            "is_active": True,
            "skills": ["React", "Node.js", "Docker"]
        }
    },
    {
        "_index": "employees",
        "_id": 2,
        "_source": {
            "employee_id": "EMP004",
            "name": "David Brown",
            "email": "david@company.com",
            "department": "Sales",
            "salary": 85000.00,
            "hire_date": "2022-11-05",
            "is_active": True,
            "skills": ["CRM", "Negotiation", "Presentation"]
        }
    },
    {
        "_index": "employees",
        "_id": 5,
        "_source": {
            "employee_id": "EMP005",
            "name": "Eve Davis",
            "email": "eve@company.com",
            "department": "Engineering",
            "salary": 92000.00,
            "hire_date": "2023-04-12",
            "is_active": False,
            "skills": ["Python", "Machine Learning", "TensorFlow"]
        }
    }
]

success, failed = helpers.bulk(client,employees)
logger.info(f"Successfully created: {success}")
logger.info(f"Creation failed: {failed}")

2025-12-28 20:23:28,505 - opensearch - INFO - POST https://localhost:9200/_bulk [status:200 request:0.022s]
2025-12-28 20:23:28,506 - __main__ - INFO - Successfully created: 3
2025-12-28 20:23:28,506 - __main__ - INFO - Creation failed: []
