## Create OpenSearch client

In [6]:
import boto3
from botocore.config import Config
import json

config = Config(region_name="eu-west-1")

client = boto3.client("opensearch", config=config)

os_domain_name = "os-genai-accelerator"  # opensearch domain name
os_index_name = "admin-ch-pressreleases-en"  # opensearch index


def get_credentials(secret_id: str, region_name: str) -> str:
    client = boto3.client("secretsmanager", region_name=region_name)
    response = client.get_secret_value(SecretId=secret_id)
    secrets_value = json.loads(response["SecretString"])
    return secrets_value


# getting OpenSearch credentials
user = get_credentials("opensearch_pw", "eu-west-1")["user"]
secret = get_credentials("opensearch_pw", "eu-west-1")["password"]
os_http_auth = (user, secret)

# Amazon OpenSearch Service Domain Operations

This script involves two key functions: `createDomain()` and `waitForDomainProcessing()`.

## createDomain()
This function is used to create an Amazon OpenSearch Service domain.

```python
def createDomain(client, domainName):
    """Creates an Amazon OpenSearch Service domain with the specified options."""
    # code
```

    
## waitForDomainProcessing()
This function is used to check the status of the domain until it's no longer processing.
It checks every 30 seconds to see if the domain is still processing any changes. Once the domain is done processing, it prints out a message stating that the changes have finished processing and outputs the domain's description.

```python
def waitForDomainProcessing(client, domainName):
    """Waits for the domain to finish processing changes."""
    # code
```
    
- **client**: The AWS client for performing OpenSearch operations.
- **domainName**: The name of the OpenSearch domain whose status is being checked.

In [7]:
# full documentation: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/opensearch/client/create_domain.html
def createDomain(client, domainName):
    """Creates an Amazon OpenSearch Service domain with the specified options."""
    response = client.create_domain(
        DomainName=domainName,
        EngineVersion="OpenSearch_2.7",
        ClusterConfig={
            "InstanceType": "t3.medium.search",
            "InstanceCount": 1,
            "DedicatedMasterEnabled": False,
        },
        EBSOptions={"EBSEnabled": True, "VolumeType": "gp2", "VolumeSize": 100},
        AccessPolicies='{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"AWS": "*"},"Action":"es:*","Resource":"arn:aws:es:eu-west-1:843197046435:domain/'
        + os_domain_name
        + '/*"}]}',
        EncryptionAtRestOptions={
            "Enabled": True,
        },
        DomainEndpointOptions={
            "EnforceHTTPS": True,
        },
        NodeToNodeEncryptionOptions={"Enabled": True},
        AdvancedSecurityOptions={
            "Enabled": True,
            "InternalUserDatabaseEnabled": True,
            "MasterUserOptions": {"MasterUserName": user, "MasterUserPassword": secret},
        },
    )
    return response


def waitForDomainProcessing(client, domainName):
    """Waits for the domain to finish processing changes."""
    try:
        response = client.describe_domain(DomainName=domainName)
        # Every 60 seconds, check whether the domain is processing.
        while response["DomainStatus"]["Processing"] == True:
            print("Domain still processing...")
            time.sleep(60)
            response = client.describe_domain(DomainName=domainName)

        # Once we exit the loop, the domain is available.
        print(
            "Amazon OpenSearch Service has finished processing changes for your domain."
        )
        print("Domain description:")
        print(response)
        return response

    except botocore.exceptions.ClientError as error:
        if error.response["Error"]["Code"] == "ResourceNotFoundException":
            print("Domain not found. Please check the domain name.")
        else:
            raise error

# Amazon OpenSearch Service Domain
Waiting for OpenSearch cluster creation and returning **Endpoint**
Move the ***os_domain_ep* value to the Notebook where you work with embeddings

In [8]:
os_domain = createDomain(client, os_domain_name)
os_domain_status = waitForDomainProcessing(client, os_domain_name)

# generate opensearch domain endpoint
os_domain_ep = (
    os_domain["DomainStatus"]["Endpoint"]
    if "DomainStatus" in os_domain
    else os_domain_status["DomainStatus"]["Endpoint"]
)
print("https://" + os_domain_ep)

Amazon OpenSearch Service has finished processing changes for your domain.
Domain description:
{'ResponseMetadata': {'RequestId': 'dc21cb28-c44d-4dec-9458-327f70364093', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'dc21cb28-c44d-4dec-9458-327f70364093', 'content-type': 'application/json', 'content-length': '2465', 'date': 'Tue, 22 Aug 2023 12:00:55 GMT'}, 'RetryAttempts': 0}, 'DomainStatus': {'DomainId': '843197046435/os-genai-accelerator', 'DomainName': 'os-genai-accelerator', 'ARN': 'arn:aws:es:eu-west-1:843197046435:domain/os-genai-accelerator', 'Created': True, 'Deleted': False, 'Endpoint': 'search-os-genai-accelerator-q7pfgnj3cswyy2ts2wrpxbzvgq.eu-west-1.es.amazonaws.com', 'Processing': False, 'UpgradeProcessing': False, 'EngineVersion': 'OpenSearch_2.7', 'ClusterConfig': {'InstanceType': 't3.medium.search', 'InstanceCount': 1, 'DedicatedMasterEnabled': False, 'ZoneAwarenessEnabled': False, 'WarmEnabled': False, 'ColdStorageOptions': {'Enabled': False}, 'MultiAZWith

## Deleting OpenSearch index if already exists.

In [9]:
# deleting the index before inserting data again
# langchain can't do this, so we are using opensearchpy module
from opensearchpy import OpenSearch

# domain name should be without https://
client = OpenSearch(
    hosts=[{"host": os_domain_ep, "port": 443}],
    http_auth=os_http_auth,
    use_ssl=True,
    verify_certs=False,
    ssl_assert_hostname=False,
    ssl_show_warn=False,
)
client.info()

# check if index exists
if client.indices.exists(index=os_index_name):
    print(f"Index '{os_index_name}' exists. Deleting now...")
    client.indices.delete(index=os_index_name)
else:
    print(f"Index '{os_index_name}' does not exist.")

Index 'admin-ch-pressreleases-en' does not exist.
