In [None]:
!pip install requests_aws4auth

**Import required libraries**

In [None]:
import requests
import boto3
from requests_aws4auth import AWS4Auth
import time
import os

In [None]:
region = 'us-east-1'
service = 'es'
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)
domain = os.environ['opensearch_domain_endpoint']

**Required settings for RAG Tool**

In [None]:
# PUT _cluster/settings
url = f"{domain}/_cluster/settings"
payload = {
    "persistent": {
        "plugins.ml_commons.only_run_on_ml_node": "false",
        "plugins.ml_commons.memory_feature_enabled": "true"
    }
}
response = requests.put(url, json=payload,auth=awsauth)
print(response.status_code)
print(response.text)

200
{"acknowledged":true,"persistent":{"plugins":{"ml_commons":{"memory_feature_enabled":"true","only_run_on_ml_node":"false"}}},"transient":{}}


**Deploy embedding model in Opensearch**

In [None]:
# POST /_plugins/_ml/models/_register?deploy=true
url = f"{domain}/_plugins/_ml/models/_register?deploy=true"
payload = {
    "name": "huggingface/sentence-transformers/all-MiniLM-L12-v2",
    "version": "1.0.1",
    "model_format": "TORCH_SCRIPT"
}
response = requests.post(url, json=payload,auth=awsauth)
print(response.status_code)
print(response.text)
if response.status_code == 200:
    task_id = response.json()['task_id']

200
{"task_id":"OvaliZEBTLWKRj7Yyqzf","status":"CREATED"}


**Check the status of deployment**

In [None]:
# GET /_plugins/_ml/tasks/888mhZEBzNdrTs7be20V
url = f"{domain}/_plugins/_ml/tasks/{task_id}"
while True:
    response = requests.get(url,auth=awsauth)
    print(response.status_code)
    print(response.text)
    if response.status_code == 200 and response.json()['state'] == 'COMPLETED':
        embedding_model_id = response.json()['model_id']
        break
    time.sleep(2)


200
{"task_type":"REGISTER_MODEL","function_name":"TEXT_EMBEDDING","state":"CREATED","worker_node":["xdYV0RL3TzOYEGKpb6EZdg"],"create_time":1724591229550,"last_update_time":1724591229550,"is_async":true}
200
{"task_type":"REGISTER_MODEL","function_name":"TEXT_EMBEDDING","state":"CREATED","worker_node":["xdYV0RL3TzOYEGKpb6EZdg"],"create_time":1724591229550,"last_update_time":1724591229550,"is_async":true}
200
{"task_type":"REGISTER_MODEL","function_name":"TEXT_EMBEDDING","state":"CREATED","worker_node":["xdYV0RL3TzOYEGKpb6EZdg"],"create_time":1724591229550,"last_update_time":1724591233357,"is_async":true}
200
{"task_type":"REGISTER_MODEL","function_name":"TEXT_EMBEDDING","state":"CREATED","worker_node":["xdYV0RL3TzOYEGKpb6EZdg"],"create_time":1724591229550,"last_update_time":1724591233357,"is_async":true}
200
{"task_type":"REGISTER_MODEL","function_name":"TEXT_EMBEDDING","state":"CREATED","worker_node":["xdYV0RL3TzOYEGKpb6EZdg"],"create_time":1724591229550,"last_update_time":17245912333

**Create text embedding pipeline**

In [None]:
# PUT /_ingest/pipeline/test-pipeline-local-model
ingestion_pipeline = "test-pipeline-local-model"
url = f"{domain}/_ingest/pipeline/{ingestion_pipeline}"
payload = {
    "description": "text embedding pipeline",
    "processors": [
        {
            "text_embedding": {
                "model_id": embedding_model_id,
                "field_map": {
                    "text": "embedding"
                }
            }
        }
    ]
}
response = requests.put(url, json=payload,auth=awsauth)
print(response.status_code)
print(response.text)

200
{"acknowledged":true}


**Create index**

In [None]:
# PUT my_test_data
index = "my_test_index"
url = f"{domain}/{index}"
payload = {
    "mappings": {
        "properties": {
            "text": {
                "type": "text"
            },
            "embedding": {
                "type": "knn_vector",
                "dimension": 384
            }
        }
    },
    "settings": {
        "index": {
            "knn.space_type": "cosinesimil",
            "default_pipeline": ingestion_pipeline,
            "knn": "true"
        }
    }
}
response = requests.put(url, json=payload,auth=awsauth)
print(response.status_code)
print(response.text)

200
{"acknowledged":true,"shards_acknowledged":true,"index":"my_test_index"}


In [None]:
# List of documents to be indexed
documents = [
    {
        "_id": "1",
        "text": "Chart and table of population level and growth rate for the Ogden-Layton metro area from 1950 to 2023. United Nations population projections are also included through the year 2035.\nThe current metro area population of Ogden-Layton in 2023 is 750,000, a 1.63% increase from 2022.\nThe metro area population of Ogden-Layton in 2022 was 738,000, a 1.79% increase from 2021.\nThe metro area population of Ogden-Layton in 2021 was 725,000, a 1.97% increase from 2020.\nThe metro area population of Ogden-Layton in 2020 was 711,000, a 2.16% increase from 2019."
    },
    {
        "_id": "2",
        "text": "Chart and table of population level and growth rate for the New York City metro area from 1950 to 2023. United Nations population projections are also included through the year 2035.\nThe current metro area population of New York City in 2023 is 18,937,000, a 0.37% increase from 2022.\nThe metro area population of New York City in 2022 was 18,867,000, a 0.23% increase from 2021.\nThe metro area population of New York City in 2021 was 18,823,000, a 0.1% increase from 2020.\nThe metro area population of New York City in 2020 was 18,804,000, a 0.01% decline from 2019."
    },
    {
        "_id": "3",
        "text": "Chart and table of population level and growth rate for the Chicago metro area from 1950 to 2023. United Nations population projections are also included through the year 2035.\nThe current metro area population of Chicago in 2023 is 8,937,000, a 0.4% increase from 2022.\nThe metro area population of Chicago in 2022 was 8,901,000, a 0.27% increase from 2021.\nThe metro area population of Chicago in 2021 was 8,877,000, a 0.14% increase from 2020.\nThe metro area population of Chicago in 2020 was 8,865,000, a 0.03% increase from 2019."
    },
    {
        "_id": "4",
        "text": "Chart and table of population level and growth rate for the Miami metro area from 1950 to 2023. United Nations population projections are also included through the year 2035.\nThe current metro area population of Miami in 2023 is 6,265,000, a 0.8% increase from 2022.\nThe metro area population of Miami in 2022 was 6,215,000, a 0.78% increase from 2021.\nThe metro area population of Miami in 2021 was 6,167,000, a 0.74% increase from 2020.\nThe metro area population of Miami in 2020 was 6,122,000, a 0.71% increase from 2019."
    },
    {
        "_id": "5",
        "text": "Chart and table of population level and growth rate for the Austin metro area from 1950 to 2023. United Nations population projections are also included through the year 2035.\nThe current metro area population of Austin in 2023 is 2,228,000, a 2.39% increase from 2022.\nThe metro area population of Austin in 2022 was 2,176,000, a 2.79% increase from 2021.\nThe metro area population of Austin in 2021 was 2,117,000, a 3.12% increase from 2020.\nThe metro area population of Austin in 2020 was 2,053,000, a 3.43% increase from 2019."
    },
    {
        "_id": "6",
        "text": "Chart and table of population level and growth rate for the Seattle metro area from 1950 to 2023. United Nations population projections are also included through the year 2035.\nThe current metro area population of Seattle in 2023 is 3,519,000, a 0.86% increase from 2022.\nThe metro area population of Seattle in 2022 was 3,489,000, a 0.81% increase from 2021.\nThe metro area population of Seattle in 2021 was 3,461,000, a 0.82% increase from 2020.\nThe metro area population of Seattle in 2020 was 3,433,000, a 0.79% increase from 2019."
    }
]

# Function to index a document
def index_document(index, document_id, document):
    url = f"{domain}/{index}/_doc/{document_id}"
    response = requests.put(url, json=document,auth=awsauth)
    return response.status_code, response.text

# Index each document individually
for doc in documents:
    response = index_document(index, doc["_id"], {"text": doc["text"]})
    print(f"Indexed document {doc['_id']}: {response}")

Indexed document 1: (201, '{"_index":"my_test_index","_id":"1","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":0,"_primary_term":1}')
Indexed document 2: (201, '{"_index":"my_test_index","_id":"2","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":0,"_primary_term":1}')
Indexed document 3: (201, '{"_index":"my_test_index","_id":"3","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":0,"_primary_term":1}')
Indexed document 4: (201, '{"_index":"my_test_index","_id":"4","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":0,"_primary_term":1}')
Indexed document 5: (201, '{"_index":"my_test_index","_id":"5","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":1,"_primary_term":1}')
Indexed document 6: (201, '{"_index":"my_test_index","_id":"6","_version":1,"result":"created","_shards":{"total":2,"suc

**Create an ML connector to Bedrock foundation model**

In [None]:
url = f'{domain}/_plugins/_ml/connectors/_create'


payload = {
  "name": "BedRock test claude Connector",
  "description": "The connector to BedRock service for claude model",
  "version": 1,
  "protocol": "aws_sigv4",
  "parameters": {
      "region": "us-east-1",
      "service_name": "bedrock",
      "anthropic_version": "bedrock-2023-05-31",
      "endpoint": "bedrock.us-east-1.amazonaws.com",
      "auth": "Sig_V4",
      "content_type": "application/json",
      "max_tokens_to_sample": 8000,
      "temperature": 0.0001,
      "response_filter": "$.completion"
  },
  "credential": {
      "roleArn": os.environ['role_arn']
  },
  "actions": [
    {
      "action_type": "predict",
      "method": "POST",
      "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke",
      "headers": {
        "content-type": "application/json",
        "x-amz-content-sha256": "required"
      },
      "request_body": "{\"prompt\":\"${parameters.prompt}\", \"max_tokens_to_sample\":${parameters.max_tokens_to_sample}, \"temperature\":${parameters.temperature},  \"anthropic_version\":\"${parameters.anthropic_version}\" }"
    }
  ]
}

headers = {"Content-Type": "application/json"}

response = requests.post(url, auth=awsauth, json=payload, headers=headers)
print(response.status_code)
print(response.text)
if response.status_code == 200:
    connector_id = response.json()['connector_id']

200
{"connector_id":"-H66iZEBLuUqtvazVcpd"}


**Create model group**

In [None]:
# Register a model group
model_group_name = "bedrock_model_group"
url = f"{domain}/_plugins/_ml/model_groups/_register"
payload = {
    "name": model_group_name,
    "description": "This is a public model group"
}
response = requests.post(url, json=payload,auth=awsauth)
print(response.status_code)
print(response.text)
if response.status_code == 200:
    generation_model_group_id = response.json()['model_group_id']

200
{"model_group_id":"rzO9iZEBQrK436tdDcnZ","status":"CREATED"}


**Deploy Bedrock model**

In [None]:
# Register and deploy a model
url = f"{domain}/_plugins/_ml/models/_register?deploy=true"
payload = {
    "name": "Bedrock Claude V2 model",
    "function_name": "remote",
    "model_group_id": generation_model_group_id,
    "description": "test model",
    "connector_id": connector_id
}
response = requests.post(url, json=payload,auth=awsauth)
print(response.status_code)
print(response.text)
if response.status_code == 200:
    task_id = response.json()['task_id']


200
{"task_id":"sDO9iZEBQrK436tdOMkK","status":"CREATED","model_id":"sTO9iZEBQrK436tdP8lc"}


**Check the status of deployment**

In [None]:
# Get task status
url = f"{domain}/_plugins/_ml/tasks/{task_id}"
while True:
    response = requests.get(url,auth=awsauth)
    print(response.status_code)
    print(response.text)
    if response.status_code == 200 and response.json()['state'] == 'COMPLETED':
        generation_model_id = response.json()['model_id']
        break
    time.sleep(2)

200
{"model_id":"sTO9iZEBQrK436tdP8lc","task_type":"REGISTER_MODEL","function_name":"REMOTE","state":"COMPLETED","worker_node":["xdYV0RL3TzOYEGKpb6EZdg"],"create_time":1724592764687,"last_update_time":1724592767059,"is_async":false}


**Prediction using deployed model**

In [None]:
# Predict using a model
url = f"{domain}/_plugins/_ml/models/{generation_model_id}/_predict"
payload = {
    "parameters": {
        "prompt": "\n\nHuman:hello\n\nAssistant:"
    }
}
response = requests.post(url, json=payload,auth=awsauth)
print(response.status_code)
print(response.text)

200
{"inference_results":[{"output":[{"name":"response","dataAsMap":{"response":" Hello! My name is Claude."}}],"status_code":200}]}


**Deploy a RAG agent**

In [None]:
# Register an agent
url = f"{domain}/_plugins/_ml/agents/_register"
payload = {
    "name": "Test_Agent_For_RAG",
    "type": "flow",
    "description": "this is a test agent",
    "tools": [
        {
            "type": "VectorDBTool",
            "parameters": {
                "model_id": embedding_model_id,
                "index": "my_test_data",
                "embedding_field": "embedding",
                "source_field": ["text"],
                "input": "${parameters.question}"
            }
        },
        {
            "type": "MLModelTool",
            "description": "A general tool to answer any question",
            "parameters": {
                "model_id": generation_model_id,
                "prompt": "\n\nHuman:You are a professional data analyst. You will always answer a question based on the given context first. If the answer is not directly shown in the context, you will analyze the data and find the answer. If you don't know the answer, just say you don't know. \n\n Context:\n${parameters.VectorDBTool.output}\n\nHuman:${parameters.question}\n\nAssistant:"
            }
        }
    ]
}
response = requests.post(url, json=payload,auth=awsauth)
print(response.status_code)
print(response.text)
if response.status_code == 200:
    agent_id = response.json()['agent_id']

200
{"agent_id":"szPAiZEBQrK436tdWcmL"}


**Check the status of deployment**

In [None]:
# Get agent status
url = f"{domain}/_plugins/_ml/agents/{agent_id}"
response = requests.get(url,auth=awsauth)
print(response.status_code)
print(response.text)

200
{"name":"Test_Agent_For_RAG","type":"flow","description":"this is a test agent","tools":[{"type":"VectorDBTool","parameters":{"input":"${parameters.question}","source_field":"[\"text\"]","embedding_field":"embedding","index":"my_test_data","model_id":"rTOliZEBQrK436td2cmL"},"include_output_in_agent_response":false},{"type":"MLModelTool","description":"A general tool to answer any question","parameters":{"model_id":"sTO9iZEBQrK436tdP8lc","prompt":"\n\nHuman:You are a professional data analyst. You will always answer a question based on the given context first. If the answer is not directly shown in the context, you will analyze the data and find the answer. If you don't know the answer, just say you don't know. \n\n Context:\n${parameters.VectorDBTool.output}\n\nHuman:${parameters.question}\n\nAssistant:"},"include_output_in_agent_response":false}],"created_time":1724592969793,"last_updated_time":1724592969793,"is_hidden":false}


**Execute RAG agent**

In [None]:
# Execute agent action
url = f"{domain}/_plugins/_ml/agents/{agent_id}/_execute"
payload = {
    "parameters": {
        "question": "what's the population increase of Seattle from 2021 to 2023"
    }
}
response = requests.post(url, json=payload,auth=awsauth)
print(response.status_code)
print(response.text)