# 1b - Create OpenSearch indices for document summaries and full document text
Run this notebook if the chatbot demo CloudFormation parameter CreateLambda was set to No.
## IMPORTANT: This notebook's functionality is duplicate of the Lambda function chatbot-demo-lambda-opensearch-setup which is created by the Chatbot Demo CloudFormation stack.  There is no need to run this if the Lambda function has been executed.

#### Install dependencies

In [None]:
!pip install opensearch-py-ml accelerate sentence-transformers deprecated pandas==2.0.3

#### Import dependencies

In [None]:
import boto3
#import re
#import time
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth, helpers
from opensearch_py_ml.ml_models import SentenceTransformerModel
from opensearch_py_ml.ml_commons import MLCommonClient

#### Get a boto3 session and retrieve the region name

In [None]:
session = boto3.session.Session()
region_name = session.region_name

print("Region is", region_name)
%store region_name

#### Set the name of the OpenSearch domain created by the CloudFormation stack.  Should be chatbot-demo.

In [None]:
opensearch_domain_name = "chatbot-demo"

#### Retrieve the endpoint of the OpenSearch domain created by the CloudFormation stack.

In [None]:
opensearch_client = boto3.client("opensearch")
response = opensearch_client.describe_domain(
    DomainName = opensearch_domain_name
)
host = response['DomainStatus']['Endpoints']['vpc']
%store host
print("The endpoint for the OpenSearch domain", opensearch_domain_name, "is", host)

#### Get a client for the OpenSearch endpoint

In [None]:
credentials = boto3.Session().get_credentials()
auth = AWSV4SignerAuth(credentials, region_name)

opensearch_client = OpenSearch(
    hosts = [{'host': host, 'port': 443}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection
)

#### Store key parameters for later use

In [None]:
summary_index_name = "chatbot-summary"
full_text_index_name = "chatbot-full_text"
pipeline_id = "chatbot-nlp-pipeline"
%store summary_index_name
%store full_text_index_name
%store pipeline_id

#### Make OpenSearch cluster setting for ml_commons only_run_on_ml_mode to false

In [None]:
s = b'{"transient":{"plugins.ml_commons.only_run_on_ml_node": false}}'
opensearch_client.cluster.put_settings(body=s)

#### Read back the OpenSearch cluster settings to confirm

In [None]:
opensearch_client.cluster.get_settings(flat_settings=True)

#### Register the distillbert-roberta-v1 model in OpenSearch ML Commons and get model_id
Note: This can take a while.

In [None]:
ml_client = MLCommonClient(opensearch_client)
model_id = ml_client.register_pretrained_model(model_name = "huggingface/sentence-transformers/all-distilroberta-v1", model_version = "1.0.1", model_format = "TORCH_SCRIPT", deploy_model=True, wait_until_deployed=True)
print(model_id)

#### Store the model_id for later use

In [None]:
%store model_id

#### Read back model info from OpenSearch cluster to confirm
model_state should be 'DEPLOYED'

In [None]:
model_info = ml_client.get_model_info(model_id)

print(model_info)

#### Define the OpenSearch neural search ingestion pipeline

In [None]:
pipeline={
  "description": "Neural search pipeline",
  "processors" : [
    {
      "text_embedding": {
        "model_id": model_id,
        "field_map": {
           "text": "text_embedding"
        }
      }
    }
  ]
}
opensearch_client.ingest.put_pipeline(id=pipeline_id,body=pipeline)

#### Read back the ingestion pipeline to confirm

In [None]:
opensearch_client.ingest.get_pipeline(id=pipeline_id)

#### Define the knn index

In [None]:
knn_index = {
  "settings": {
    "index.knn": True,
    "default_pipeline": pipeline_id
  },
  "mappings": {
    "properties": {
      "document": {
        "type": "text"
      },
      "section": {
        "type": "integer"
      },
      "text_embedding": {
        "type": "knn_vector",
        "dimension": 768,
        "method": {
          "engine": "faiss",
          "space_type": "l2",
          "name": "hnsw",
          "parameters": {}
        }
      },
      "text": {
        "type": "text"
      }
    }
  }
}

#### Create the index for document summaries

In [None]:
opensearch_client.indices.create(index=summary_index_name, body=knn_index, ignore=400)

#### Create the index for the full text summaries

In [None]:
opensearch_client.indices.create(index=full_text_index_name, body=knn_index, ignore=400)

#### Read back the list of OpenSearch indices to confirm

In [None]:
for index in opensearch_client.indices.get('*'):
    print(index)

#### Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#### SPDX-License-Identifier: MIT-0