# Import

In [1]:
import boto3
import sys
import json
import pprint
from botocore.client import Config
from botocore.exceptions import NoCredentialsError, PartialCredentialsError, ClientError
import os
import random
from retrying import retry
import time
from utility import *

print('Running boto3 version:', boto3.__version__)

Running boto3 version: 1.35.34


# Testing connection

In [2]:
test_aws_connection()

Connected successfully! Buckets:
  dev-managinglife-aws-bedrock
  dev-managinglife-db-bkp
  logs-bucket-managinglife
  managemypain-archives
  managinglife-business-data
  managinglife-business-data-humana
  managinglife-config-s3
  managinglife-documents
  managinglife-drupal-db-prod
  managinglife-es-bkp
  managinglife-guardduty-s3
  managinglife-images
  managinglife-solr-bkp
  managinglife-userdata-ca
  managinglife-userdata-ca-dev
  managinglife-userdata-ca-preprod
  managinglifebucket1
  s3-logs-bucket-managinglife
  shared-managinglife-developer-s3
  terraform-managinglife


# Converse API

In [3]:
region = 'ca-central-1'

bedrock = boto3.client(
    service_name = 'bedrock-runtime',
    region_name = region,
    )

MODEL_IDS = [
    "anthropic.claude-3-sonnet-20240229-v1:0",
    ]

In [4]:
prompt = ("What is the capital of Italy?")
print(f'Prompt: {prompt}\n')

for i in MODEL_IDS:
    response = invoke_bedrock_model(bedrock, i, prompt)
    print(f'Model: {i}\n{response}')

Prompt: What is the capital of Italy?

Model: anthropic.claude-3-sonnet-20240229-v1:0
The capital of Italy is Rome.
--- Latency: 311ms - Input tokens:14 - Output tokens:10 ---



# Multiple lines Converse API

In [5]:
bedrock_client = boto3.client('bedrock-runtime',region_name='ca-central-1')

In [6]:
messages=[{ "role":'user', "content":[{'type':'text','text': "What is quantum mechanics? "}]},\
         { "role":'assistant', "content":[{'type':'text','text': "It is a branch of physics that \
         describes how matter and energy interact with discrete energy values "}]},\
         { "role":'user', "content":[{'type':'text','text': "Can you explain a bit more about discrete energies?"}]}]

generate_message(bedrock_client, model_id = 'anthropic.claude-3-sonnet-20240229-v1:0',messages=messages,max_tokens=512,temp=0.5,top_p=0.9)

{'id': 'msg_bdrk_01QGJCGVZzrkMsK41fBJiQBG',
 'type': 'message',
 'role': 'assistant',
 'model': 'claude-3-sonnet-20240229',
 'content': [{'type': 'text',
   'text': 'Sure, the concept of discrete or quantized energies is a key principle of quantum mechanics. It states that the energy of particles or systems can only take on certain specific values, rather than varying continuously.\n\nSome key points about discrete energies:\n\n- Particles like electrons can only exist in specific, discrete energy levels around the nucleus rather than any arbitrary energy level.\n\n- When an electron transitions between allowed energy levels, it absorbs or emits a quantum of energy with very specific values, rather than any arbitrary amount.\n\n- The allowed energy levels depend on the specific system, like an atom or molecule. Each has a unique set of permissible discrete energy values.\n\n- The quantization of energy is one of the reasons why quantum mechanics departs from classical physics, which vi

## Messages with System (Personas)

In [7]:
messages = [{ "role":'user', "content":[{'type':'text','text': "What is quantum mechanics?"}]}]

system = "Respond in a way a caveman would understand"
generate_message(bedrock_client, model_id = "anthropic.claude-3-sonnet-20240229-v1:0",messages=messages,max_tokens=512,temp=0.5,top_p=0.9,system=system)

{'id': 'msg_bdrk_01HB1JgQEJDQCdetqpfCi8Sy',
 'type': 'message',
 'role': 'assistant',
 'model': 'claude-3-sonnet-20240229',
 'content': [{'type': 'text',
   'text': 'Here\'s how I would explain quantum mechanics in caveman terms:\n\nQuantum make tiny things act weird. Rock seem solid, but tiny bits inside rock, they move and bounce all over place like crazy! Sometimes tiny bits act like particle, little ball. Sometimes act like wave, like ripples on water. Crazy, right?\n\nEven crazier, tiny bits exist in many places at once until caveman look at them. When caveman look, tiny bit "decide" where it is. Spooky action at distance too - two tiny bits far apart, do same dance like connected by invisible force. \n\nQuantum make no sense to caveman brain used to regular big things. But quantum real, and let smart cavemen make fire-starters, heat rocks, all kinds of new cave magic. Quantum mechanics hard to grunt, but very powerful knowledge.'}],
 'stop_reason': 'end_turn',
 'stop_sequence': N

# Knowledge base

In [8]:

pp = pprint.PrettyPrinter(indent=2)
session = boto3.session.Session()
region = session.region_name or 'ca-central-1' 
bedrock_config = Config(connect_timeout=120, read_timeout=120, retries={'max_attempts': 0})

bedrock_agent_client = boto3.client("bedrock-agent-runtime",
                              region_name='ca-central-1',
                              config=bedrock_config,
                                    )
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

In [10]:
document_s3_uri = "s3://dev-managinglife-aws-bedrock/1. User Guide/Common Questions.md"
query = "Summarize the document"
response = retrieveAndGenerate(query, document_s3_uri, region)
generated_text = response['output']['text']
pp.pprint(generated_text)

NameError: name 'document_s3_uri' is not defined

In [None]:
citations = response["citations"]
contexts = []
for citation in citations:
    retrievedReferences = citation["retrievedReferences"]
    for reference in retrievedReferences:
         contexts.append(reference["content"]["text"])

pp.pprint(contexts)

# Knowledge base template
This notebook provides sample code for building an empty OpenSearch Serverless (OSS) index,Knowledge bases for Amazon Bedrock and ingest documents into the index from various data sources (S3, Confluence, Sharepoint, Salesforce, and Web). 

A data pipeline that ingests documents (typically stored in multiple data sources) into a knowledge base i.e. a vector database such as Amazon OpenSearch Service Serverless (AOSS) so that it is available for lookup when a question is received.

- Create an empty OpenSearch serverless index.
- Create knowledge base
- Create data source(s) within knowledge base
- For each data source, start ingestion jobs using KB APIs which will read data from the data source, chunk it, convert chunks into embeddings using Amazon Titan Embeddings model and then store these embeddings in AOSS. All of this without having to build, deploy and manage the data pipeline.

In [None]:
suffix = random.randrange(200, 900)

sts_client = boto3.client('sts')
boto3_session = boto3.session.Session()
region_name = boto3_session.region_name or 'ca-central-1'

bedrock_agent_client = boto3.client('bedrock-agent', region_name=region_name)
bedrock_agent_runtime_client = boto3.client('bedrock-agent-runtime', region_name=region_name)

service = 'aoss'
s3_client = boto3.client('s3')
account_id = sts_client.get_caller_identity()["Account"]
s3_suffix = f"{region_name}-{account_id}"

In [None]:
bucket_name = 'dev-managinglife-aws-bedrock'

## Below is a list of data sources including, 1 S3 buckets, 1 confluence, 1 Sharepoint, 1 Salesforce connectors

data_sources=[
                {"type": "S3", "bucket_name": bucket_name}, 
                
                # {"type": "CONFLUENCE", "hostUrl": "https://example.atlassian.net", "authType": "BASIC",
                #  "credentialsSecretArn": f"arn:aws::secretsmanager:{region_name}:secret:<<your_secret_name>>"},

                # {"type": "SHAREPOINT", "tenantId": "888d0b57-69f1-4fb8-957f-e1f0bedf64de", "domain": "yourdomain",
                #   "authType": "OAUTH2_CLIENT_CREDENTIALS",
                #  "credentialsSecretArn": f"arn:aws::secretsmanager:{region_name}:secret:<<your_secret_name>>",
                #  "siteUrls": ["https://yourdomain.sharepoint.com/sites/mysite"]
                # },

                # {"type": "SALESFORCE", "hostUrl": "https://company.salesforce.com/", "authType": "OAUTH2_CLIENT_CREDENTIALS",
                #  "credentialsSecretArn": f"arn:aws::secretsmanager:{region_name}:secret:<<your_secret_name>>"
                # },

                # {"type": "WEB", "seedUrls": [{ "url": "https://www.examplesite.com"}],
                #  "inclusionFilters": ["https://www\.examplesite\.com/.*\.html"],
                #  "exclusionFilters": ["https://www\.examplesite\.com/contact-us\.html"]
                # }
            ]
                
pp = pprint.PrettyPrinter(indent=2)

In [None]:
# For S3 data source, check if S3 bucket exists, and if not create S3 bucket for knowledge base data source

for ds in [d for d in data_sources if d['type']== 'S3']:
    bucket_name = ds['bucket_name']
    s3_client.head_bucket(Bucket=bucket_name)
    print(f'Bucket {bucket_name} Exists')
   

## Create a vector store - Open Search Serverless Index

In [None]:
vector_store_name = f'bedrock-sample-rag-{suffix}'
index_name = f"bedrock-sample-rag-index-{suffix}"
aoss_client = boto3_session.client('opensearchserverless', region_name = 'ca-central-1')

# Personal ARN 
bedrock_kb_execution_role_arn = 'arn:aws:iam::442186832995:role/service-role/AmazonBedrockExecutionRoleForKnowledgeBase_nmhn9'

In [None]:
# create security, network and data access policies within OSS
encryption_policy, network_policy, access_policy = create_policies_in_oss(vector_store_name=vector_store_name,
                       aoss_client=aoss_client,
                       bedrock_kb_execution_role_arn=bedrock_kb_execution_role_arn)
collection = aoss_client.create_collection(name=vector_store_name,type='VECTORSEARCH')

In [None]:
pp.pprint(collection)

In [None]:
# Get the OpenSearch serverless collection URL
collection_id = collection['createCollectionDetail']['id']
host = collection_id + '.' + region_name + '.aoss.amazonaws.com'
print(host)