In [16]:
# Check the execution role
import sagemaker  
sagemaker.get_execution_role()  

'arn:aws:iam::809719347864:role/AmazonBedrockExecutionRoleForKnowledgeBase_SageMaker-ht-custom'

In [29]:
# # Working example of generating description for Amaozn financial results

DEFAULT_MODEL= "cohere.command-r-plus-v1:0"
COMMAND_R_PLUS = "cohere.command-r-plus-v1:0"
COMMAND_R = "cohere.command-r-v1:0"
model_id = DEFAULT_MODEL

bedrock_rt = boto3.client(service_name="bedrock-runtime")

#a function to generate the text
#temp set to 0.3 by default
def generate_text(prompt, model_id, temp=0.3):
    body = {
    'message': prompt,
    'temperature': temp,
    'preamble':""
    }
    # Invoke the Bedrock model
    response = bedrock_rt.invoke_model_with_response_stream(
        modelId= model_id,
        body=json.dumps(body)
    )
    # Print the response
    stream = response.get('body')
    if stream:
        for event in stream:
            chunk = event.get('chunk')
            if chunk:
                byte = chunk.get('bytes').decode()
                output=json.loads(byte)
            if output['event_type'] == 'text-generation':
                print(output['text'], end='')
                
user_input ="Give financial highlights from Amazon's earnings call"
prompt = user_input
response = generate_text(prompt, model_id)

print(response)

Amazon reported its fourth-quarter earnings on Thursday, and the company beat expectations on revenue and profit. Here are some key financial highlights from the earnings report:

- Revenue: Amazon reported quarterly revenue of $157.3 billion, up 9% from the same period last year and beating expectations of $155.3 billion.
- Profit: The company reported a quarterly profit of $3.5 billion, down 10% from the previous year but above expectations of $2.8 billion.
- AWS: Amazon Web Services, the company's cloud computing division, continued to be a strong performer. AWS revenue grew 20% year-over-year to $17.8 billion, and operating income increased 29% to $5.2 billion.
- North America: Revenue from Amazon's North America business grew 13% year-over-year to $105.4 billion, while operating income decreased 1% to $3.3 billion.
- International: Revenue from Amazon's international business grew 3% year-over-year to $44.6 billion, while the division's operating loss widened to $2.3 billion from 

In [48]:
# Utility.py - this is where the notebook starts

import boto3
import random
import time


suffix = random.randrange(200, 900)
boto3_session = boto3.session.Session()
region_name = boto3_session.region_name
iam_client = boto3_session.client('iam')
account_number = boto3.client('sts').get_caller_identity().get('Account')
identity = boto3.client('sts').get_caller_identity()['Arn']

encryption_policy_name = f"bedrock-sample-rag-sp-{suffix}"
network_policy_name = f"bedrock-sample-rag-np-{suffix}"
access_policy_name = f'bedrock-sample-rag-ap-{suffix}'
bedrock_execution_role_name = f'AmazonBedrockExecutionRoleForKnowledgeBase_{suffix}'
fm_policy_name = f'AmazonBedrockFoundationModelPolicyForKnowledgeBase_{suffix}'
s3_policy_name = f'AmazonBedrockS3PolicyForKnowledgeBase_{suffix}'
sm_policy_name = f'AmazonBedrockSecretPolicyForKnowledgeBase_{suffix}'
oss_policy_name = f'AmazonBedrockOSSPolicyForKnowledgeBase_{suffix}'


def create_bedrock_execution_role(bucket_name):
    foundation_model_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "bedrock:InvokeModel",
                ],
                "Resource": [
                    f"arn:aws:bedrock:{region_name}::foundation-model/amazon.titan-embed-text-v1",
                    f"arn:aws:bedrock:{region_name}::foundation-model/amazon.titan-embed-text-v2:0"
                ]
            }
        ]
    }

    s3_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "s3:GetObject",
                    "s3:ListBucket"
                ],
                "Resource": [
                    f"arn:aws:s3:::{bucket_name}",
                    f"arn:aws:s3:::{bucket_name}/*"
                ],
                "Condition": {
                    "StringEquals": {
                        "aws:ResourceAccount": f"{account_number}"
                    }
                }
            }
        ]
    }

    assume_role_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Principal": {
                    "Service": "bedrock.amazonaws.com"
                },
                "Action": "sts:AssumeRole"
            }
        ]
    }
    # create policies based on the policy documents
    fm_policy = iam_client.create_policy(
        PolicyName=fm_policy_name,
        PolicyDocument=json.dumps(foundation_model_policy_document),
        Description='Policy for accessing foundation model',
    )

    s3_policy = iam_client.create_policy(
        PolicyName=s3_policy_name,
        PolicyDocument=json.dumps(s3_policy_document),
        Description='Policy for reading documents from s3')

    # create bedrock execution role
    bedrock_kb_execution_role = iam_client.create_role(
        RoleName=bedrock_execution_role_name,
        AssumeRolePolicyDocument=json.dumps(assume_role_policy_document),
        Description='Amazon Bedrock Knowledge Base Execution Role for accessing OSS and S3',
        MaxSessionDuration=3600
    )

    # fetch arn of the policies and role created above
    bedrock_kb_execution_role_arn = bedrock_kb_execution_role['Role']['Arn']
    s3_policy_arn = s3_policy["Policy"]["Arn"]
    fm_policy_arn = fm_policy["Policy"]["Arn"]
    

    # attach policies to Amazon Bedrock execution role
    iam_client.attach_role_policy(
        RoleName=bedrock_kb_execution_role["Role"]["RoleName"],
        PolicyArn=fm_policy_arn
    )
    iam_client.attach_role_policy(
        RoleName=bedrock_kb_execution_role["Role"]["RoleName"],
        PolicyArn=s3_policy_arn
    )
    return bedrock_kb_execution_role


def create_oss_policy_attach_bedrock_execution_role(collection_id, bedrock_kb_execution_role):
    # define oss policy document
    oss_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "aoss:APIAccessAll"
                ],
                "Resource": [
                    f"arn:aws:aoss:{region_name}:{account_number}:collection/{collection_id}"
                ]
            }
        ]
    }
    oss_policy = iam_client.create_policy(
        PolicyName=oss_policy_name,
        PolicyDocument=json.dumps(oss_policy_document),
        Description='Policy for accessing opensearch serverless',
    )
    oss_policy_arn = oss_policy["Policy"]["Arn"]
    print("Opensearch serverless arn: ", oss_policy_arn)

    iam_client.attach_role_policy(
        RoleName=bedrock_kb_execution_role["Role"]["RoleName"],
        PolicyArn=oss_policy_arn
    )
    return None


def create_policies_in_oss(vector_store_name, aoss_client, bedrock_kb_execution_role_arn):
    encryption_policy = aoss_client.create_security_policy(
        name=encryption_policy_name,
        policy=json.dumps(
            {
                'Rules': [{'Resource': ['collection/' + vector_store_name],
                           'ResourceType': 'collection'}],
                'AWSOwnedKey': True
            }),
        type='encryption'
    )

    network_policy = aoss_client.create_security_policy(
        name=network_policy_name,
        policy=json.dumps(
            [
                {'Rules': [{'Resource': ['collection/' + vector_store_name],
                            'ResourceType': 'collection'}],
                 'AllowFromPublic': True}
            ]),
        type='network'
    )
    access_policy = aoss_client.create_access_policy(
        name=access_policy_name,
        policy=json.dumps(
            [
                {
                    'Rules': [
                        {
                            'Resource': ['collection/' + vector_store_name],
                            'Permission': [
                                'aoss:CreateCollectionItems',
                                'aoss:DeleteCollectionItems',
                                'aoss:UpdateCollectionItems',
                                'aoss:DescribeCollectionItems'],
                            'ResourceType': 'collection'
                        },
                        {
                            'Resource': ['index/' + vector_store_name + '/*'],
                            'Permission': [
                                'aoss:CreateIndex',
                                'aoss:DeleteIndex',
                                'aoss:UpdateIndex',
                                'aoss:DescribeIndex',
                                'aoss:ReadDocument',
                                'aoss:WriteDocument'],
                            'ResourceType': 'index'
                        }],
                    'Principal': [identity, bedrock_kb_execution_role_arn],
                    'Description': 'Easy data policy'}
            ]),
        type='data'
    )
    return encryption_policy, network_policy, access_policy


def delete_iam_role_and_policies():
    fm_policy_arn = f"arn:aws:iam::{account_number}:policy/{fm_policy_name}"
    s3_policy_arn = f"arn:aws:iam::{account_number}:policy/{s3_policy_name}"
    oss_policy_arn = f"arn:aws:iam::{account_number}:policy/{oss_policy_name}"
    sm_policy_arn = f"arn:aws:iam::{account_number}:policy/{sm_policy_name}"

    iam_client.detach_role_policy(
        RoleName=bedrock_execution_role_name,
        PolicyArn=s3_policy_arn
    )
    iam_client.detach_role_policy(
        RoleName=bedrock_execution_role_name,
        PolicyArn=fm_policy_arn
    )
    iam_client.detach_role_policy(
        RoleName=bedrock_execution_role_name,
        PolicyArn=oss_policy_arn
    )
    iam_client.detach_role_policy(
        RoleName=bedrock_execution_role_name,
        PolicyArn=sm_policy_arn
    )
    iam_client.delete_role(RoleName=bedrock_execution_role_name)
    iam_client.delete_policy(PolicyArn=s3_policy_arn)
    iam_client.delete_policy(PolicyArn=fm_policy_arn)
    iam_client.delete_policy(PolicyArn=oss_policy_arn)
    iam_client.delete_policy(PolicyArn=sm_policy_arn)
    return 0


def interactive_sleep(seconds: int):
    dots = ''
    for i in range(seconds):
        dots += '.'
        print(dots, end='\r')
        time.sleep(1)

def create_bedrock_execution_role_multi_ds(bucket_names = None, secrets_arns = None):
    
    # 0. Create bedrock execution role

    assume_role_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Principal": {
                    "Service": "bedrock.amazonaws.com"
                },
                "Action": "sts:AssumeRole"
            }
        ]
    }
    
    # create bedrock execution role
    bedrock_kb_execution_role = iam_client.create_role(
        RoleName=bedrock_execution_role_name,
        AssumeRolePolicyDocument=json.dumps(assume_role_policy_document),
        Description='Amazon Bedrock Knowledge Base Execution Role for accessing OSS, secrets manager and S3',
        MaxSessionDuration=3600
    )

    # fetch arn of the role created above
    bedrock_kb_execution_role_arn = bedrock_kb_execution_role['Role']['Arn']

    # 1. Cretae and attach policy for foundation models
    foundation_model_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "bedrock:InvokeModel",
                ],
                "Resource": [
                    f"arn:aws:bedrock:{region_name}::foundation-model/amazon.titan-embed-text-v1",
                    f"arn:aws:bedrock:{region_name}::foundation-model/amazon.titan-embed-text-v2:0"
                ]
            }
        ]
    }
    
    fm_policy = iam_client.create_policy(
        PolicyName=fm_policy_name,
        PolicyDocument=json.dumps(foundation_model_policy_document),
        Description='Policy for accessing foundation model',
    )
  
    # fetch arn of this policy 
    fm_policy_arn = fm_policy["Policy"]["Arn"]
    
    # attach this policy to Amazon Bedrock execution role
    iam_client.attach_role_policy(
        RoleName=bedrock_kb_execution_role["Role"]["RoleName"],
        PolicyArn=fm_policy_arn
    )

    # 2. Cretae and attach policy for s3 bucket
    if bucket_names:
        s3_policy_document = {
            "Version": "2012-10-17",
            "Statement": [
                {
                    "Effect": "Allow",
                    "Action": [
                        "s3:GetObject",
                        "s3:ListBucket"
                    ],
                    "Resource": [item for sublist in [[f'arn:aws:s3:::{bucket}', f'arn:aws:s3:::{bucket}/*'] for bucket in bucket_names] for item in sublist], 
                    "Condition": {
                        "StringEquals": {
                            "aws:ResourceAccount": f"{account_number}"
                        }
                    }
                }
            ]
        }
        # create policies based on the policy documents
        s3_policy = iam_client.create_policy(
            PolicyName=s3_policy_name,
            PolicyDocument=json.dumps(s3_policy_document),
            Description='Policy for reading documents from s3')

        # fetch arn of this policy 
        s3_policy_arn = s3_policy["Policy"]["Arn"]
        
        # attach this policy to Amazon Bedrock execution role
        iam_client.attach_role_policy(
            RoleName=bedrock_kb_execution_role["Role"]["RoleName"],
            PolicyArn=s3_policy_arn
        )

    # 3. Cretae and attach policy for secrets manager
    if secrets_arns:
        secrets_manager_policy_document = {
            "Version": "2012-10-17",
            "Statement": [
                {
                    "Effect": "Allow",
                    "Action": [
                        "secretsmanager:GetSecretValue",
                        "secretsmanager:PutSecretValue"
                    ],
                    "Resource": secrets_arns
                }
            ]
        }
        # create policies based on the policy documents
        
        secrets_manager_policy = iam_client.create_policy(
            PolicyName=sm_policy_name,
            PolicyDocument=json.dumps(secrets_manager_policy_document),
            Description='Policy for accessing secret manager',
        )

        # fetch arn of this policy
        sm_policy_arn = secrets_manager_policy["Policy"]["Arn"]

        # attach policy to Amazon Bedrock execution role
        iam_client.attach_role_policy(
            RoleName=bedrock_kb_execution_role["Role"]["RoleName"],
            PolicyArn=sm_policy_arn
        )
    
    return bedrock_kb_execution_role

In [35]:
%pip install -U opensearch-py==2.3.1
%pip install -U boto3==1.33.2
%pip install -U retrying==1.3.4

Collecting opensearch-py==2.3.1
  Downloading opensearch_py-2.3.1-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting urllib3<2,>=1.21.1 (from opensearch-py==2.3.1)
  Downloading urllib3-1.26.19-py2.py3-none-any.whl.metadata (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.3/49.3 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Downloading opensearch_py-2.3.1-py2.py3-none-any.whl (327 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m327.3/327.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hDownloading urllib3-1.26.19-py2.py3-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.9/143.9 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: urllib3, opensearch-py
  Attempting uninstall: urllib3
    Found existing installation: urllib3 2.2.2
    Uninstalling urllib3-2.2.2:
      Successfully uninstalled urllib3-2.2.2
Successfully installed opensearch-py-2

In [36]:
import warnings
warnings.filterwarnings('ignore')

In [38]:
import json
import os
import boto3
from botocore.exceptions import ClientError
import pprint
# from utility import create_bedrock_execution_role, create_oss_policy_attach_bedrock_execution_role, create_policies_in_oss, interactive_sleep
import random
from retrying import retry

In [39]:
suffix = random.randrange(200, 900)

sts_client = boto3.client('sts')
boto3_session = boto3.session.Session()
region_name = boto3_session.region_name
bedrock_agent_client = boto3_session.client('bedrock-agent', region_name=region_name)
service = 'aoss'
s3_client = boto3.client('s3')
account_id = sts_client.get_caller_identity()["Account"]
s3_suffix = f"{region_name}-{account_id}"
bucket_name = f'bedrock-kb-{s3_suffix}' # replace it with your bucket name.
pp = pprint.PrettyPrinter(indent=2)

In [40]:
# Check if bucket exists, and if not create S3 bucket for knowledge base data source
try:
    s3_client.head_bucket(Bucket=bucket_name)
    print(f'Bucket {bucket_name} Exists')
except ClientError as e:
    print(f'Creating bucket {bucket_name}')
    if region_name == "us-east-1":
        s3bucket = s3_client.create_bucket(
            Bucket=bucket_name)
    else:
        s3bucket = s3_client.create_bucket(
        Bucket=bucket_name,
        CreateBucketConfiguration={ 'LocationConstraint': region_name }
    )

Creating bucket bedrock-kb-us-east-1-809719347864


In [41]:
%store bucket_name

Stored 'bucket_name' (str)


In [42]:
import boto3
import time
vector_store_name = f'bedrock-sample-rag-{suffix}'
index_name = f"bedrock-sample-rag-index-{suffix}"
aoss_client = boto3_session.client('opensearchserverless')
bedrock_kb_execution_role = create_bedrock_execution_role(bucket_name=bucket_name)
bedrock_kb_execution_role_arn = bedrock_kb_execution_role['Role']['Arn']

In [43]:
# create security, network and data access policies within OSS
encryption_policy, network_policy, access_policy = create_policies_in_oss(vector_store_name=vector_store_name,
                       aoss_client=aoss_client,
                       bedrock_kb_execution_role_arn=bedrock_kb_execution_role_arn)
collection = aoss_client.create_collection(name=vector_store_name,type='VECTORSEARCH')

In [44]:
pp.pprint(collection)

{ 'ResponseMetadata': { 'HTTPHeaders': { 'connection': 'keep-alive',
                                         'content-length': '310',
                                         'content-type': 'application/x-amz-json-1.0',
                                         'date': 'Wed, 28 Aug 2024 07:00:09 '
                                                 'GMT',
                                         'x-amzn-requestid': '397a5772-74cc-4715-93b0-d83811eee396'},
                        'HTTPStatusCode': 200,
                        'RequestId': '397a5772-74cc-4715-93b0-d83811eee396',
                        'RetryAttempts': 0},
  'createCollectionDetail': { 'arn': 'arn:aws:aoss:us-east-1:809719347864:collection/9iupha0l00c6fr1ygh',
                              'createdDate': 1724828409309,
                              'id': '9iupha0l00c6fr1ygh',
                              'kmsKeyArn': 'auto',
                              'lastModifiedDate': 1724828409309,
                              'na

In [45]:
%store encryption_policy network_policy access_policy collection

Stored 'encryption_policy' (dict)
Stored 'network_policy' (dict)
Stored 'access_policy' (dict)
Stored 'collection' (dict)


In [46]:
# Get the OpenSearch serverless collection URL
collection_id = collection['createCollectionDetail']['id']
host = collection_id + '.' + region_name + '.aoss.amazonaws.com'
print(host)

9iupha0l00c6fr1ygh.us-east-1.aoss.amazonaws.com


In [47]:
# wait for collection creation
# This can take couple of minutes to finish
response = aoss_client.batch_get_collection(names=[vector_store_name])
# Periodically check collection status
while (response['collectionDetails'][0]['status']) == 'CREATING':
    print('Creating collection...')
    interactive_sleep(30)
    response = aoss_client.batch_get_collection(names=[vector_store_name])
print('\nCollection successfully created:')
pp.pprint(response["collectionDetails"])


Collection successfully created:
[ { 'arn': 'arn:aws:aoss:us-east-1:809719347864:collection/9iupha0l00c6fr1ygh',
    'collectionEndpoint': 'https://9iupha0l00c6fr1ygh.us-east-1.aoss.amazonaws.com',
    'createdDate': 1724828409309,
    'dashboardEndpoint': 'https://9iupha0l00c6fr1ygh.us-east-1.aoss.amazonaws.com/_dashboards',
    'id': '9iupha0l00c6fr1ygh',
    'kmsKeyArn': 'auto',
    'lastModifiedDate': 1724828433221,
    'name': 'bedrock-sample-rag-457',
    'standbyReplicas': 'ENABLED',
    'status': 'ACTIVE',
    'type': 'VECTORSEARCH'}]


In [49]:
# create opensearch serverless access policy and attach it to Bedrock execution role
try:
    create_oss_policy_attach_bedrock_execution_role(collection_id=collection_id,
                                                    bedrock_kb_execution_role=bedrock_kb_execution_role)
    # It can take up to a minute for data access rules to be enforced
    interactive_sleep(60)
except Exception as e:
    print("Policy already exists")
    pp.pprint(e)

Opensearch serverless arn:  arn:aws:iam::809719347864:policy/AmazonBedrockOSSPolicyForKnowledgeBase_565
............................................................

# Step 2...Create Vector Index

In [50]:
# Create the vector index in Opensearch serverless, with the knn_vector field index mapping, specifying the dimension size, name and engine.
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth, RequestError
credentials = boto3.Session().get_credentials()
awsauth = auth = AWSV4SignerAuth(credentials, region_name, service)

index_name = f"bedrock-sample-index-{suffix}"
body_json = {
   "settings": {
      "index.knn": "true",
       "number_of_shards": 1,
       "knn.algo_param.ef_search": 512,
       "number_of_replicas": 0,
   },
   "mappings": {
      "properties": {
         "vector": {
            "type": "knn_vector",
            "dimension": 1536,
             "method": {
                 "name": "hnsw",
                 "engine": "faiss",
                 "space_type": "l2"
             },
         },
         "text": {
            "type": "text"
         },
         "text-metadata": {
            "type": "text"         }
      }
   }
}

# Build the OpenSearch client
oss_client = OpenSearch(
    hosts=[{'host': host, 'port': 443}],
    http_auth=awsauth,
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection,
    timeout=300
)

In [51]:
# Create index
try:
    response = oss_client.indices.create(index=index_name, body=json.dumps(body_json))
    print('\nCreating index:')
    pp.pprint(response)

    # index creation can take up to a minute
    interactive_sleep(60)
except RequestError as e:
    # you can delete the index if its already exists
    # oss_client.indices.delete(index=index_name)
    print(f'Error while trying to create the index, with error {e.error}\nyou may unmark the delete above to delete, and recreate the index')
    


Creating index:
{ 'acknowledged': True,
  'index': 'bedrock-sample-index-565',
  'shards_acknowledged': True}
............................................................

# Download data to ingest into our knowledge base

In [52]:
# Download and prepare dataset
!mkdir -p ./data

from urllib.request import urlretrieve
urls = [
    'https://s2.q4cdn.com/299287126/files/doc_financials/2023/ar/2022-Shareholder-Letter.pdf',
    'https://s2.q4cdn.com/299287126/files/doc_financials/2022/ar/2021-Shareholder-Letter.pdf',
    'https://s2.q4cdn.com/299287126/files/doc_financials/2021/ar/Amazon-2020-Shareholder-Letter-and-1997-Shareholder-Letter.pdf',
    'https://s2.q4cdn.com/299287126/files/doc_financials/2020/ar/2019-Shareholder-Letter.pdf'
]

filenames = [
    'AMZN-2022-Shareholder-Letter.pdf',
    'AMZN-2021-Shareholder-Letter.pdf',
    'AMZN-2020-Shareholder-Letter.pdf',
    'AMZN-2019-Shareholder-Letter.pdf'
]

data_root = "./data/"

for idx, url in enumerate(urls):
    file_path = data_root + filenames[idx]
    urlretrieve(url, file_path)

In [53]:
# Upload data to s3 to the bucket that was configured as a data source to the knowledge base
s3_client = boto3.client("s3")
def uploadDirectory(path,bucket_name):
        for root,dirs,files in os.walk(path):
            for file in files:
                s3_client.upload_file(os.path.join(root,file),bucket_name,file)

uploadDirectory(data_root, bucket_name)

# Create a Knowledge Base

In [54]:
opensearchServerlessConfiguration = {
            "collectionArn": collection["createCollectionDetail"]['arn'],
            "vectorIndexName": index_name,
            "fieldMapping": {
                "vectorField": "vector",
                "textField": "text",
                "metadataField": "text-metadata"
            }
        }

# Ingest strategy - How to ingest data from the data source
chunkingStrategyConfiguration = {
    "chunkingStrategy": "FIXED_SIZE",
    "fixedSizeChunkingConfiguration": {
        "maxTokens": 512,
        "overlapPercentage": 20
    }
}

# The data source to ingest documents from, into the OpenSearch serverless knowledge base index
s3Configuration = {
    "bucketArn": f"arn:aws:s3:::{bucket_name}",
    # "inclusionPrefixes":["*.*"] # you can use this if you want to create a KB using data within s3 prefixes.
}

# The embedding model used by Bedrock to embed ingested documents, and realtime prompts
embeddingModelArn = f"arn:aws:bedrock:{region_name}::foundation-model/amazon.titan-embed-text-v1"

name = f"bedrock-sample-knowledge-base-{suffix}"
description = "Amazon shareholder letter knowledge base."
roleArn = bedrock_kb_execution_role_arn

In [55]:
# Create a KnowledgeBase
from retrying import retry

@retry(wait_random_min=1000, wait_random_max=2000,stop_max_attempt_number=7)
def create_knowledge_base_func():
    create_kb_response = bedrock_agent_client.create_knowledge_base(
        name = name,
        description = description,
        roleArn = roleArn,
        knowledgeBaseConfiguration = {
            "type": "VECTOR",
            "vectorKnowledgeBaseConfiguration": {
                "embeddingModelArn": embeddingModelArn
            }
        },
        storageConfiguration = {
            "type": "OPENSEARCH_SERVERLESS",
            "opensearchServerlessConfiguration":opensearchServerlessConfiguration
        }
    )
    return create_kb_response["knowledgeBase"]

In [56]:
try:
    kb = create_knowledge_base_func()
except Exception as err:
    print(f"{err=}, {type(err)=}")

In [57]:
pp.pprint(kb)

{ 'createdAt': datetime.datetime(2024, 8, 28, 7, 12, 36, 45552, tzinfo=tzlocal()),
  'description': 'Amazon shareholder letter knowledge base.',
  'knowledgeBaseArn': 'arn:aws:bedrock:us-east-1:809719347864:knowledge-base/NRUZOOJE1L',
  'knowledgeBaseConfiguration': { 'type': 'VECTOR',
                                  'vectorKnowledgeBaseConfiguration': { 'embeddingModelArn': 'arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v1'}},
  'knowledgeBaseId': 'NRUZOOJE1L',
  'name': 'bedrock-sample-knowledge-base-565',
  'roleArn': 'arn:aws:iam::809719347864:role/AmazonBedrockExecutionRoleForKnowledgeBase_259',
  'status': 'CREATING',
  'storageConfiguration': { 'opensearchServerlessConfiguration': { 'collectionArn': 'arn:aws:aoss:us-east-1:809719347864:collection/9iupha0l00c6fr1ygh',
                                                                   'fieldMapping': { 'metadataField': 'text-metadata',
                                                                        

In [58]:
# Get KnowledgeBase 
get_kb_response = bedrock_agent_client.get_knowledge_base(knowledgeBaseId = kb['knowledgeBaseId'])

In [59]:
# Create a DataSource in KnowledgeBase 
create_ds_response = bedrock_agent_client.create_data_source(
    name = name,
    description = description,
    knowledgeBaseId = kb['knowledgeBaseId'],
    dataSourceConfiguration = {
        "type": "S3",
        "s3Configuration":s3Configuration
    },
    vectorIngestionConfiguration = {
        "chunkingConfiguration": chunkingStrategyConfiguration
    }
)
ds = create_ds_response["dataSource"]
pp.pprint(ds)

{ 'createdAt': datetime.datetime(2024, 8, 28, 7, 13, 14, 807078, tzinfo=tzlocal()),
  'dataSourceConfiguration': { 's3Configuration': { 'bucketArn': 'arn:aws:s3:::bedrock-kb-us-east-1-809719347864'},
                               'type': 'S3'},
  'dataSourceId': 'YTMMESX7YU',
  'description': 'Amazon shareholder letter knowledge base.',
  'knowledgeBaseId': 'NRUZOOJE1L',
  'name': 'bedrock-sample-knowledge-base-565',
  'status': 'AVAILABLE',
  'updatedAt': datetime.datetime(2024, 8, 28, 7, 13, 14, 807078, tzinfo=tzlocal()),
  'vectorIngestionConfiguration': { 'chunkingConfiguration': { 'chunkingStrategy': 'FIXED_SIZE',
                                                               'fixedSizeChunkingConfiguration': { 'maxTokens': 512,
                                                                                                   'overlapPercentage': 20}}}}


In [60]:
# Get DataSource 
bedrock_agent_client.get_data_source(knowledgeBaseId = kb['knowledgeBaseId'], dataSourceId = ds["dataSourceId"])

{'ResponseMetadata': {'RequestId': '82472358-1ba6-418b-916b-06219d16b196',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Wed, 28 Aug 2024 07:13:25 GMT',
   'content-type': 'application/json',
   'content-length': '603',
   'connection': 'keep-alive',
   'x-amzn-requestid': '82472358-1ba6-418b-916b-06219d16b196',
   'x-amz-apigw-id': 'dNUjdHrhoAMEPFg=',
   'x-amzn-trace-id': 'Root=1-66cece15-4061a1d64a69a30b6725c70b'},
  'RetryAttempts': 0},
 'dataSource': {'knowledgeBaseId': 'NRUZOOJE1L',
  'dataSourceId': 'YTMMESX7YU',
  'name': 'bedrock-sample-knowledge-base-565',
  'status': 'AVAILABLE',
  'description': 'Amazon shareholder letter knowledge base.',
  'dataSourceConfiguration': {'type': 'S3',
   's3Configuration': {'bucketArn': 'arn:aws:s3:::bedrock-kb-us-east-1-809719347864'}},
  'vectorIngestionConfiguration': {'chunkingConfiguration': {'chunkingStrategy': 'FIXED_SIZE',
    'fixedSizeChunkingConfiguration': {'maxTokens': 512,
     'overlapPercentage': 20}}},
  'createdAt': da

# Start ingestion job

In [61]:
# Start an ingestion job
start_job_response = bedrock_agent_client.start_ingestion_job(knowledgeBaseId = kb['knowledgeBaseId'], dataSourceId = ds["dataSourceId"])

In [62]:
job = start_job_response["ingestionJob"]
pp.pprint(job)

{ 'dataSourceId': 'YTMMESX7YU',
  'ingestionJobId': 'ALHPIDBCAK',
  'knowledgeBaseId': 'NRUZOOJE1L',
  'startedAt': datetime.datetime(2024, 8, 28, 7, 13, 55, 875877, tzinfo=tzlocal()),
  'statistics': { 'numberOfDocumentsDeleted': 0,
                  'numberOfDocumentsFailed': 0,
                  'numberOfDocumentsScanned': 0,
                  'numberOfModifiedDocumentsIndexed': 0,
                  'numberOfNewDocumentsIndexed': 0},
  'status': 'STARTING',
  'updatedAt': datetime.datetime(2024, 8, 28, 7, 13, 55, 875877, tzinfo=tzlocal())}


In [63]:
# Get job 
while(job['status']!='COMPLETE' ):
    get_job_response = bedrock_agent_client.get_ingestion_job(
      knowledgeBaseId = kb['knowledgeBaseId'],
        dataSourceId = ds["dataSourceId"],
        ingestionJobId = job["ingestionJobId"]
  )
    job = get_job_response["ingestionJob"]
    
    interactive_sleep(30)

pp.pprint(job)

{ 'dataSourceId': 'YTMMESX7YU',
  'ingestionJobId': 'ALHPIDBCAK',
  'knowledgeBaseId': 'NRUZOOJE1L',
  'startedAt': datetime.datetime(2024, 8, 28, 7, 13, 55, 875877, tzinfo=tzlocal()),
  'statistics': { 'numberOfDocumentsDeleted': 0,
                  'numberOfDocumentsFailed': 0,
                  'numberOfDocumentsScanned': 4,
                  'numberOfModifiedDocumentsIndexed': 0,
                  'numberOfNewDocumentsIndexed': 4},
  'status': 'COMPLETE',
  'updatedAt': datetime.datetime(2024, 8, 28, 7, 14, 13, 297835, tzinfo=tzlocal())}


In [64]:
# Print the knowledge base Id in bedrock, that corresponds to the Opensearch index in the collection we created before, we will use it for the invocation later
kb_id = kb["knowledgeBaseId"]
pp.pprint(kb_id)

'NRUZOOJE1L'


In [65]:
# keep the kb_id for invocation later in the invoke request
%store kb_id

Stored 'kb_id' (str)


# Test the Knowledge Base

In [66]:
# try out KB using RetrieveAndGenerate API
bedrock_agent_runtime_client = boto3.client("bedrock-agent-runtime", region_name=region_name)
# Lets see how different Anthropic Claude 3 models responds to the input text we provide
claude_model_ids = [ ["Claude 3 Sonnet", "anthropic.claude-3-sonnet-20240229-v1:0"], ["Claude 3 Haiku", "anthropic.claude-3-haiku-20240307-v1:0"]]

In [67]:
def ask_bedrock_llm_with_knowledge_base(query: str, model_arn: str, kb_id: str) -> str:
    response = bedrock_agent_runtime_client.retrieve_and_generate(
        input={
            'text': query
        },
        retrieveAndGenerateConfiguration={
            'type': 'KNOWLEDGE_BASE',
            'knowledgeBaseConfiguration': {
                'knowledgeBaseId': kb_id,
                'modelArn': model_arn
            }
        },
    )

    return response

In [68]:
query = "What is Amazon's doing in the field of generative AI?"

for model_id in claude_model_ids:
    model_arn = f'arn:aws:bedrock:{region_name}::foundation-model/{model_id[1]}'
    response = ask_bedrock_llm_with_knowledge_base(query, model_arn, kb_id)
    generated_text = response['output']['text']
    citations = response["citations"]
    contexts = []
    for citation in citations:
        retrievedReferences = citation["retrievedReferences"]
        for reference in retrievedReferences:
            contexts.append(reference["content"]["text"])
    print(f"---------- Generated using {model_id[0]}:")
    pp.pprint(generated_text )
    print(f'---------- The citations for the response generated by {model_id[0]}:')
    pp.pprint(contexts)
    print()

---------- Generated using Claude 3 Sonnet:
('Amazon has been working on developing its own large language models (LLMs) '
 'for generative AI applications. The company believes generative AI will '
 'transform and improve virtually every customer experience across its '
 'consumer, seller, brand, and creator offerings. Amazon is investing '
 'substantially in LLMs and plans to continue doing so. On the AWS cloud '
 'platform, Amazon is democratizing generative AI technology by offering '
 'machine learning chips like Trainium and Inferentia that provide '
 'cost-effective training and running of LLMs. AWS also enables companies to '
 "choose from various LLMs and build applications with AWS's security, privacy "
 "and other features. One example is AWS's CodeWhisperer, which uses "
 'generative AI to provide real-time code suggestions to improve developer '
 'productivity.')
---------- The citations for the response generated by Claude 3 Sonnet:
[ 'Amazon has been using machine learni

# Buffer!

# Text Embedding using Cohere LLM Model and stored in Amazon OpenSearch serverless

---

## Introduction

Embeddings are integral to various natural language processing applications, with their quality crucial for optimal performance. They are commonly used in knowledge bases to represent textual data as dense vectors enabling efficient similarity search and retrieval. Embeddings play a key role in personalization and recommendation systems by representing user preferences, item characteristics, and historical interactions as vectors, allowing calculation of similarities for personalized recommendations based on user behavior and item embeddings.

In this notebook, we demonstrate how to use the Cohere Embed Multilingual V3 LLM (Large Language Model) for creating text embedding that will be stored in Amazon OpenSearch with vector engine support for assisting with the prompt engineering task for more accurate response from LLMs.


## Prepare Documents


Before being able to search text based on meaning, not just keywords the questions, the documents must be processed and a stored in a document store index

* Load the documents
* Create a numerical vector representation using Amazon Bedrock Cohere model
* Create an index and the corresponding embeddings in the Amazon Open Search Serverless

## Search Text

When the documents index is prepared, you are ready to search text and relevant documents will be fetched based on the query being asked. Following steps will be executed.

* Create an embedding of the input query
* Compare the query embedding with the embeddings in the index
* Fetch the (top N) relevant document chunks
* Add those chunks as part of the context in the prompt
* Send the prompt to the Cohere Command R+ model under Amazon Bedrock
* Get the contextual answer based on the documents retrieved


---



It's recommended to execute the notebook in SageMaker Studio Notebooks `Python 3.0(Data Science)` Kernel with `ml.t3.medium` instance.

In [None]:
%load_ext autoreload
%autoreload 2

## Step 0: Install Dependencies

Before running the rest of this notebook, you'll need to run the cells below to ensure necessary libraries are installed.

In [None]:
!pip install opensearch-py
!pip install requests-aws4auth
!pip install -U boto3
!pip install -U botocore
!pip install -U awscli
!pip install -U datasets
!pip install -U pypdf
!pip install langchain
!pip install langchain-community

Install some python packages we are going to use.

In [None]:
# External Dependencies:
import warnings

from io import StringIO
import sys
import textwrap
import os
from typing import Optional


import boto3
from botocore.config import Config
import sagemaker
import json
import time
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth, helpers

warnings.filterwarnings('ignore')

In [None]:
# getting boto3 clients for required AWS services

aoss_client = boto3.client('opensearchserverless')

bedrock_client = boto3.client(
    "bedrock-runtime", 
    "us-east-1", 
    endpoint_url="https://bedrock-runtime.us-east-1.amazonaws.com"
)

session = boto3.session.Session()

region_name = session.region_name

In [None]:
# Create a SageMaker session
sagemaker_role_arn = sagemaker.get_execution_role()
sagemaker_role_arn

## Step 1: Load Dataset

For this notebook, Let's first download some of the files to build our document store. For this example we will be using public IRS documents from [here](https://www.irs.gov/publications).

In [None]:
from urllib.request import urlretrieve

os.makedirs("data", exist_ok=True)
files = [
    "https://www.irs.gov/pub/irs-pdf/p1544.pdf",
    "https://www.irs.gov/pub/irs-pdf/p15.pdf",
    "https://www.irs.gov/pub/irs-pdf/p1212.pdf",
]
for url in files:
    file_path = os.path.join("data", url.rpartition("/")[2])
    urlretrieve(url, file_path)

## Step 2: Data Preparation

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFDirectoryLoader


loader = PyPDFDirectoryLoader("./data/")

documents = loader.load()
# - in our testing Character split works better with this PDF data set
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=2000,
    chunk_overlap=200,
)
docs = text_splitter.split_documents(documents)

In [None]:
avg_doc_length = lambda documents: sum([len(doc.page_content) for doc in documents]) // len(
    documents
)
avg_char_count_pre = avg_doc_length(documents)
avg_char_count_post = avg_doc_length(docs)
print(f"Average length among {len(documents)} documents loaded is {avg_char_count_pre} characters.")
print(f"After the split we have {len(docs)} documents more than the original {len(documents)}.")
print(
    f"Average length among {len(docs)} documents (after split) is {avg_char_count_post} characters."
)

## Step 3: Generate embedding using Cohere Embed Multilingual V3

Cohere Embed Multilingual V3: An embedding model designed to encode text from various languages into dense vector representations, enabling efficient similarity comparisons and semantic search.


In [None]:
def create_vector_embedding_with_bedrock(text, bedrock_client):
    modelId = "cohere.embed-multilingual-v3"
    accept = "application/json"
    contentType = "application/json"
    input_type = "search_document"
    body = json.dumps({
    "texts": [text],
    "input_type": input_type }
    )

    response = bedrock_client.invoke_model(
        body=body, modelId=modelId, accept=accept, contentType=contentType
    )
    response_body = json.loads(response.get("body").read())
    embedding = response_body.get("embeddings")
    return {"text": text, "vector_field": embedding[0]}

Lets try embedding first document and check the result

In [None]:
bedrock_embeddings = create_vector_embedding_with_bedrock(docs[0].page_content, bedrock_client)
      
print(bedrock_embeddings)

## Step 4: Storing embedding in Amazon OpenSearch serverless

Following the similar pattern embeddings could be generated for the entire corpus and stored in a vector store.

First of all we have to create a vector store. In this notebook we will use Amazon OpenSearch serverless.

Amazon OpenSearch Serverless is a serverless option in Amazon OpenSearch Service. As a developer, you can use OpenSearch Serverless to run petabyte-scale workloads without configuring, managing, and scaling OpenSearch clusters. You get the same interactive millisecond response times as OpenSearch Service with the simplicity of a serverless environment. Pay only for what you use by automatically scaling resources to provide the right amount of capacity for your application—without impacting data ingestion.

### Step 4.1: Create a vector store with Opensearch Serverless using Cohere Multilingual Embed V3

Before creating the new vector search collection and index, we must first create three associated OpenSearch policies: encryption security policy, network security policy, and data access policy.

In [None]:
import random
suffix = random.randrange(200, 900)

identity = boto3.client('sts').get_caller_identity()['Arn']

def create_policies_in_oss(vector_store_name, aoss_client, role_arn):
    
    encryption_policy_name = f"cohere-sample-sp-{suffix}"
    network_policy_name = f"cohere-sample-np-{suffix}"
    access_policy_name = f'cohere-sample-ap-{suffix}'

    try:
        encryption_policy = aoss_client.create_security_policy(
            name=encryption_policy_name,
            policy=json.dumps(
                {
                    'Rules': [{'Resource': ['collection/' + vector_store_name],
                               'ResourceType': 'collection'}],
                    'AWSOwnedKey': True
                }),
            type='encryption'
        )
    except Exception as ex:
        print(ex)
    
    try:
        network_policy = aoss_client.create_security_policy(
            name=network_policy_name,
            policy=json.dumps(
                [
                    {'Rules': [{'Resource': ['collection/' + vector_store_name],
                                'ResourceType': 'collection'}],
                     'AllowFromPublic': True}
                ]),
            type='network'
        )
    except Exception as ex:
        print(ex)
    
    try:
        
        access_policy = aoss_client.create_access_policy(
            name=access_policy_name,
            policy=json.dumps(
                [
                    {
                        'Rules': [
                            {
                                'Resource': ['collection/' + vector_store_name],
                                'Permission': [
                                    'aoss:CreateCollectionItems',
                                    'aoss:DeleteCollectionItems',
                                    'aoss:UpdateCollectionItems',
                                    'aoss:DescribeCollectionItems'],
                                'ResourceType': 'collection'
                            },
                            {
                                'Resource': ['index/' + vector_store_name + '/*'],
                                'Permission': [
                                    'aoss:CreateIndex',
                                    'aoss:DeleteIndex',
                                    'aoss:UpdateIndex',
                                    'aoss:DescribeIndex',
                                    'aoss:ReadDocument',
                                    'aoss:WriteDocument'],
                                'ResourceType': 'index'
                            }],
                        'Principal': [identity, role_arn],
                        'Description': 'Easy data policy'}
                ]),
            type='data'
        )
    except Exception as ex:
        print(ex)
        
    return encryption_policy, network_policy, access_policy

### Step 4.2: Create a new collection of type VECTORSEARCH


In [None]:
# Create Collection
vector_store_name = f'cohere-embedding-collection-{suffix}'

encryption_policy, network_policy, access_policy = create_policies_in_oss(vector_store_name=vector_store_name,
                       aoss_client=aoss_client,
                       role_arn=sagemaker_role_arn)

In [None]:
collection = aoss_client.create_collection(name=vector_store_name,type='VECTORSEARCH')

### Step 4.3: Setting up the Amazon OpenSearch Serverless index using KNN settings

k-NN for Amazon OpenSearch Service lets you search for points in a vector space and find the "nearest neighbors" for those points by Euclidean distance or cosine similarity. Use cases include recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection.

Once the OpenSearch collection is created, create an index to store the embeddings. The index settings must be configured beforehand to enable the KNN functionality using the following configuration:

In [None]:
collection_id = collection['createCollectionDetail']['id']
host = collection_id + '.' + region_name + '.aoss.amazonaws.com'
print(host)

In [None]:
service = 'aoss'
credentials = boto3.Session().get_credentials()
awsauth = AWSV4SignerAuth(credentials, region_name, service)

index_name = f"cohere-embedding-index"
index_body = {
   "settings": {
       "index":{
          "knn": "true",
       }
   },
   "mappings": {
      "properties": {
         "vector_field": {
            "type": "knn_vector",
            "dimension": 1024 
         },
          "text": {
                    "type": "keyword"
        }
      }
   }
}


In [None]:
# Build the OpenSearch client
oss_client = OpenSearch(
    hosts=[{'host': host, 'port': 443}],
    http_auth=awsauth,
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection,
    timeout=300
)
# # It can take up to a minute for data access rules to be enforced
time.sleep(60)

To confirm its creation, we can retrieve the description of the new vector index you just created

In [None]:
# We would get an index already exists exception if the index already exists, and that is fine.
try:
    response = oss_client.indices.create(index_name, body=index_body)
    print(f"response received for the create index -> {response}")
except Exception as e:
    print(f"error in creating index={index_name}, exception={e}")

Now we are ready to inject our documents into vector store. 

In [None]:
# deleting indices
# aoss_client.indices.delete(index=index_name)

### Step 4.4: Ingest the embeddings

Next you need to loop through your dataset and ingest items data into the cluster. A more robust and scalable solution for the embedding ingestion can be found in [Ingesting enriched data into Amazon ES](https://aws.amazon.com/blogs/industries/novartis-ag-uses-amazon-elasticsearch-k-nearest-neighbor-knn-and-amazon-sagemaker-to-power-search-and-recommendation/). 

In [None]:
for idx in range(len(docs)): 
    embedding = create_vector_embedding_with_bedrock(docs[idx].page_content, bedrock_client)
    document = {
                'vector_field': embedding['vector_field'],
                'text': embedding['text']
                }
    response = oss_client.index(
    index = index_name,
    body = document
    )


## Step 5: Perform Search based on Text Input

Let’s take a look at the results of a simple query. In below example, we'll receive an text input from user, and then will send it to search engine to get the relevant results.

In [None]:
query_prompt = "Is it possible that I get sentenced to jail due to failure in filings?"
# query embedding
query_emb = create_vector_embedding_with_bedrock(query_prompt, bedrock_client)['vector_field']

In [None]:
body = {
        "size": 5,
        "query": {
            "knn": {
                "vector_field": {
                    "vector": query_emb,
                    "k": 5,
                }
            }
        },
    }     

In [None]:
# perform search based on query input
res = oss_client.search(index=index_name, body=body)
results = ""
for hit in res["hits"]["hits"]:
    id_ = hit["_id"]
    text = hit["_source"]["text"]
    results += text



## Step 6: Generative Question Answering

For Generative Question Answering we will use the RAG(Retrieval Augmented Generation) approach retrieves information most relevant to the user’s request from the enterprise knowledge base and bundles it as context along with the user’s request as a prompt, and then sends it to the LLM to get a GenAI response.

Define utility function for conversation with Bedrock converse API

Model used - Cohere Command R+: A powerful Large Language Model (LLM) capable of understanding and generating text in multiple languages.

In [None]:
def generate_conversation(
    bedrock_client,
    model_id,
    system_prompt,
    prompt,
    chat_history=[],
    temperature=0.3,
    max_tokens=400,
    top_p=0.95
):
    """
    Sends messages to a model.
    Args:
        bedrock_client: The Boto3 Bedrock runtime client.
        model_id (str): The model ID to use.
        system_prompt (str) : The system prompt for the model to use.
        prompt (str) : The message/question to send to the model.
        chat_history (list): The chat history from user and assistant.

    Returns:
        response (str): The text generated output from the model.
        chat_history (str): The full conversation between user and assistant that the model generated.

    """

    system_prompts = [
        {
            "text": system_prompt
        }
    ]

    messages = [
        {
            "role": "user",
            "content": [{"text": prompt}]
        }
    ]

    chat_history.extend(messages)

    # Base inference parameters.
    inference_config = {
        "temperature": temperature,
        "maxTokens": max_tokens,
        "topP": top_p,
    }

    # Additional inference parameters to use.
    additional_model_fields = {}

    # Send the message.
    response = bedrock_client.converse(
        modelId=model_id,
        messages=messages,
        system=system_prompts,
        inferenceConfig=inference_config,
        additionalModelRequestFields=additional_model_fields
    )

    chat_history.append(response["output"]["message"])

    return response["output"]["message"]["content"][0]["text"], chat_history

Define the system prompt and guardrails

In [None]:
system_prompt = """
## Instructions
You are an AI assistant. Your knowledge is based solely on the information provided between the <documents> and </documents> tags.

Before answering any questions, first check if the user has provided information between the <documents> and </documents> tags. If no information is provided, respond with the following JSON:

{
    "answer": "I do not have enough information to answer that question."
}

If documents are provided, your task is to answer questions accurately and concisely, using only the details from the given documents. Do not use your own knowledge or any external sources to answer the questions, even if you know the answer.

If a question cannot be fully answered using the provided documents, respond with the following JSON:

{
    "answer": "I do not have enough information to answer that question."
}

All responses must be in valid JSON format, with the 'answer' key containing the actual response text.

To provide transparency, include your reasoning process with the 'thinking' key as the following format:

{
    "answer": "Your response here",
    "thinking": "Your reasoning process here"
}

Be concise and objective in your responses, without any personal opinions or subjective statements.
"""
prompt_template = "## Documents<documents>\n{documents}\n</documents>\n\n ## Questions Question: {question}\nThink step-by-step."

In [None]:
# Define model ID parameter
model_id = "cohere.command-r-plus-v1:0"

In [None]:
chat_history = []
prompt = prompt_template.format(documents=results, question=query_prompt)
response, chat_history = generate_conversation(
    bedrock_client,
    model_id,
    system_prompt,
    prompt,
    chat_history
)
print(response)

## Step 7: Clean up

When you finish this exercise, remove your resources with the following steps:

Delete vector index.
Delete data, network, and encryption access ploicies.
Delete collection.
Delete SageMaker Studio user profile and domain.
Optionally, empty and delete the S3 bucket, or keep whatever you want.  

In [None]:
# delete vector index
oss_client.indices.delete(index=index_name)

# delete data, network, and encryption access ploicies
aoss_client.delete_access_policy(type="data", name=access_policy['accessPolicyDetail']['name'])
aoss_client.delete_security_policy(type="network", name=network_policy['securityPolicyDetail']['name'])
aoss_client.delete_security_policy(type="encryption", name=encryption_policy['securityPolicyDetail']['name'])

# delete collection
collection_id = collection['createCollectionDetail']['id']
aoss_client.delete_collection(id=collection_id)