In [None]:
%pip install -r requirements.txt -U

In [None]:
!curl https://d2eo22ngex1n9g.cloudfront.net/Documentation/SDK/bedrock-python-sdk.zip --output bedrock-python-sdk.zip
!unzip bedrock-python-sdk.zip -d bedrock-python-sdk

In [None]:
%pip install bedrock-python-sdk/botocore-1.31.21-py3-none-any.whl

In [None]:
%pip install bedrock-python-sdk/boto3-1.28.21-py3-none-any.whl

In [None]:
%pip install bedrock-python-sdk/awscli-1.29.21-py3-none-any.whl

In [None]:
%pip install requests_aws4auth opensearch-py

In [24]:
import os
os.environ['BWB_REGION_NAME']='us-west-2'
os.environ['BWB_PROFILE_NAME']='default'
os.environ['BWB_ENDPOINT_URL']='https://bedrock.us-west-2.amazonaws.com'

In [2]:
print(os.environ.get('BWB_ENDPOINT_URL'))

https://bedrock.us-west-2.amazonaws.com


In [25]:
import boto3
import json
bedrock = boto3.client(
    service_name='bedrock',
    region_name=os.environ.get('BWB_REGION_NAME'),
    endpoint_url=os.environ.get('BWB_ENDPOINT_URL')
)
bedrock.list_foundation_models()

{'ResponseMetadata': {'RequestId': '05f5b39b-4219-4997-b43b-9005371bcb4f',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Sat, 26 Aug 2023 12:55:48 GMT',
   'content-type': 'application/json',
   'content-length': '1166',
   'connection': 'keep-alive',
   'x-amzn-requestid': '05f5b39b-4219-4997-b43b-9005371bcb4f'},
  'RetryAttempts': 0},
 'modelSummaries': [{'modelArn': 'arn:aws:bedrock:us-west-2::foundation-model/amazon.titan-tg1-large',
   'modelId': 'amazon.titan-tg1-large'},
  {'modelArn': 'arn:aws:bedrock:us-west-2::foundation-model/amazon.titan-e1t-medium',
   'modelId': 'amazon.titan-e1t-medium'},
  {'modelArn': 'arn:aws:bedrock:us-west-2::foundation-model/stability.stable-diffusion-xl',
   'modelId': 'stability.stable-diffusion-xl'},
  {'modelArn': 'arn:aws:bedrock:us-west-2::foundation-model/ai21.j2-grande-instruct',
   'modelId': 'ai21.j2-grande-instruct'},
  {'modelArn': 'arn:aws:bedrock:us-west-2::foundation-model/ai21.j2-jumbo-instruct',
   'modelId': 'ai21.j2-jumbo-i

In [52]:
## Quick Test to ensure bedrock invoke api call is working
bedrock_model_id = "amazon.titan-tg1-large" #set the model to Titan

prompt = "What is the largest city in New Hampshire?" #the prompt to send to the model

body = json.dumps({"inputText": prompt, "textGenerationConfig": {"maxTokenCount": 512, "stopSequences": [], "temperature": 0, "topP": 0.9 } } ) #build the request payload

response = bedrock.invoke_model(body=body, modelId=bedrock_model_id, accept='application/json', contentType='application/json') #send the payload to Bedrock

response_body = json.loads(response.get('body').read()) # read the response

response_text = response_body.get('results')[0].get('outputText') #extract the text from the JSON response

print(response_text)


The largest city in New Hampshire is Manchester.


## Opensearch Related Setup

In [27]:
import torch
print(torch.__version__)

2.0.1


In [None]:
!pip install opensearch-py-ml
!pip install accelerate
!pip install deprecated

In [28]:
import boto3

cfn = boto3.client('cloudformation')

def get_cfn_outputs(stackname):
    outputs = {}
    for output in cfn.describe_stacks(StackName=stackname)['Stacks'][0]['Outputs']:
        outputs[output['OutputKey']] = output['OutputValue']
    return outputs

## Setup variables to use for the rest of the demo
cloudformation_stack_name = "semantic-search"

outputs = get_cfn_outputs(cloudformation_stack_name)

bucket = outputs['s3BucketTraining']
aos_host = outputs['OpenSearchDomainEndpoint']

outputs

{'OpenSourceDomainArn': 'arn:aws:es:us-west-2:391834123224:domain/opensearchservi-qvcuu1lushdf',
 'OpenSearchDomainEndpoint': 'search-opensearchservi-qvcuu1lushdf-jb3abyebhhd46nuscyz6acpw64.us-west-2.es.amazonaws.com',
 'S3BucketSecureURL': 'https://semantic-search-s3buckethosting-1kagnhrse3kpz.s3.amazonaws.com',
 'SageMakerNotebookURL': 'https://console.aws.amazon.com/sagemaker/home?region=us-east-1#/notebook-instances/openNotebook/NotebookInstance-VSLeA5HkwiJK?view=classic',
 's3BucketTraining': 'semantic-search-s3buckettraining-1t19qj06o6pyi',
 'Region': 'us-west-2',
 'OpenSearchDomainName': 'opensearchservi-qvcuu1lushdf',
 's3BucketHostingBucketName': 'semantic-search-s3buckethosting-1kagnhrse3kpz'}

In [53]:
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
import boto3

region = 'us-west-2' 

#credentials = boto3.Session().get_credentials()
#auth = AWSV4SignerAuth(credentials, region)
auth = ("master","Semantic123!")
#index_name = 'nlp_pqa'

aos_client = OpenSearch(
    hosts = [{'host': aos_host, 'port': 443}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection
)

### Configure OpenSearch domain to enable run Machine Learning code in data node

In [46]:
s = b'{"transient":{"plugins.ml_commons.only_run_on_ml_node": false}}'
aos_client.cluster.put_settings(body=s)

{'acknowledged': True,
 'persistent': {},
 'transient': {'plugins': {'ml_commons': {'only_run_on_ml_node': 'false'}}}}

In [47]:
aos_client.cluster.get_settings(flat_settings=True)

{'persistent': {'aes.jetty.admission_control.global_cpu_usage.window_duration': '300s',
  'cluster.routing.allocation.awareness.force.zone.values': 'xx-xxxxx-xx',
  'cluster.routing.allocation.disk.watermark.flood_stage': '0.9758094787597656gb',
  'cluster.routing.allocation.disk.watermark.high': '1.9516189575195313gb',
  'cluster.routing.allocation.disk.watermark.low': '2.9274284362792966gb',
  'cluster.routing.allocation.load_awareness.provisioned_capacity': '1',
  'cluster.routing.allocation.load_awareness.skew_factor': '50.0',
  'cluster_manager.throttling.thresholds.auto-create.value': '200',
  'cluster_manager.throttling.thresholds.cluster-reroute-api.value': '50',
  'cluster_manager.throttling.thresholds.cluster-update-settings.value': '50',
  'cluster_manager.throttling.thresholds.create-component-template.value': '50',
  'cluster_manager.throttling.thresholds.create-data-stream.value': '50',
  'cluster_manager.throttling.thresholds.create-index-template-v2.value': '50',
  'clu

### Downloadning some docs in pdf for chunking and then ingestion

In [None]:
!rm -fr data
!mkdir data

from urllib.request import urlretrieve
files = [
    'https://docs.aws.amazon.com/apigateway/latest/developerguide/apigateway-dg.pdf',
    'https://docs.aws.amazon.com/codewhisperer/latest/userguide/user-guide.pdf',
    'https://docs.aws.amazon.com/dlami/latest/devguide/dlami-dg.pdf'
]
for url in files:
    file_path = './data/' + url.split('/')[-1]
    urlretrieve(url, file_path)

### Setup langchain for loading the pdf, and chunking

In [None]:
 # We will be using the Titan Embeddings Model to generate our Embeddings.
from langchain.embeddings import BedrockEmbeddings
from langchain.llms.bedrock import Bedrock

# - create the Anthropic Model
#claude_llm = Bedrock(model_id="anthropic.claude-v1", client=boto3_bedrock, model_kwargs={'max_tokens_to_sample':200})
#titan_llm = Bedrock(model_id= "amazon.titan-tg1-large", client=boto3_bedrock)
bedrock_embeddings = BedrockEmbeddings(client=bedrock)

In [139]:
import numpy as np
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, PyPDFDirectoryLoader

loader = PyPDFDirectoryLoader("./data/")

documents = loader.load()
# - in our testing Character split works better with this PDF data set
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 1500,
    chunk_overlap  = 100,
)
docs = text_splitter.split_documents(documents)

In [140]:
avg_doc_length = lambda documents: sum([len(doc.page_content) for doc in documents])//len(documents)
avg_char_count_pre = avg_doc_length(documents)
avg_char_count_post = avg_doc_length(docs)
print(f'Average length among {len(documents)} documents loaded is {avg_char_count_pre} characters.')
print(f'After the split we have {len(docs)} documents more than the original {len(documents)}.')
print(f'Average length among {len(docs)} documents (after split) is {avg_char_count_post} characters.')

Average length among 1134 documents loaded is 2060 characters.
After the split we have 2207 documents more than the original 1134.
Average length among 2207 documents (after split) is 1086 characters.


### Register pre-trained model to OpenSearch domain

In [None]:
%pip install -U sentence-transformers

In [5]:
from opensearch_py_ml.ml_models import SentenceTransformerModel
from opensearch_py_ml.ml_commons import MLCommonClient

ml_client = MLCommonClient(aos_client)
model_id = ml_client.register_pretrained_model(model_name = "huggingface/sentence-transformers/all-MiniLM-L12-v2", model_version = "1.0.1", model_format = "TORCH_SCRIPT", deploy_model=False, wait_until_deployed=False)
print(model_id)

Model was registered successfully. Model Id:  qk3fLooBGSulaPZDmnJX
qk3fLooBGSulaPZDmnJX
qk3fLooBGSulaPZDmnJX


### Load the model for inference.

In [36]:
load_model_output = ml_client.deploy_model(model_id) #qk3fLooBGSulaPZDmnJX

print(load_model_output)

Model deployed successfully
{'model_id': 'qk3fLooBGSulaPZDmnJX', 'task_type': 'DEPLOY_MODEL', 'function_name': 'TEXT_EMBEDDING', 'state': 'COMPLETED', 'worker_node': ['LB9NbUrUR5KqokfPl1O_mQ'], 'create_time': 1693054694494, 'last_update_time': 1693054694519, 'is_async': True}


### Get the model detailed information.

In [37]:
model_info = ml_client.get_model_info(model_id)

print(model_info)

{'name': 'huggingface/sentence-transformers/all-MiniLM-L12-v2', 'algorithm': 'TEXT_EMBEDDING', 'model_version': '1.0.1', 'model_format': 'TORCH_SCRIPT', 'model_state': 'DEPLOYED', 'model_content_size_in_bytes': 134568911, 'model_content_hash_value': 'f8012a4e6b5da1f556221a12160d080157039f077ab85a5f6b467a47247aad49', 'model_config': {'model_type': 'bert', 'embedding_dimension': 384, 'framework_type': 'SENTENCE_TRANSFORMERS', 'all_config': '{"_name_or_path":"microsoft/MiniLM-L12-H384-uncased","attention_probs_dropout_prob":0.1,"gradient_checkpointing":false,"hidden_act":"gelu","hidden_dropout_prob":0.1,"hidden_size":384,"initializer_range":0.02,"intermediate_size":1536,"layer_norm_eps":1e-12,"max_position_embeddings":512,"model_type":"bert","num_attention_heads":12,"num_hidden_layers":12,"pad_token_id":0,"position_embedding_type":"absolute","transformers_version":"4.8.2","type_vocab_size":2,"use_cache":true,"vocab_size":30522}'}, 'created_time': 1693003520599, 'last_updated_time': 169305

### Create pipeline to convert text into vector with BERT model
We will use the just uploaded model to convert `` field into vector(embedding) and stored into `
_vector` field.

In [135]:
pipeline={
  "description": "An example neural search pipeline",
  "processors" : [
    {
      "text_embedding": {
        "model_id": model_id,
        "field_map": {
           "pdf_text": "pdf_text_vector"
        }
      }
    }
  ]
}
pipeline_id = 'nlp_pipeline'
aos_client.ingest.put_pipeline(id=pipeline_id,body=pipeline)

{'acknowledged': True}

### 12. Create a index in Amazon Opensearch Service 
Whereas we previously created an index with 2 fields, this time we'll define the index with 3 fields: the first field ' question_vector' holds the vector representation of the question, the second is the "question" for raw sentence and the third field is "answer" for the raw answer data.

To create the index, we first define the index in JSON, then use the aos_client connection we initiated ealier to create the index in OpenSearch.

In [136]:
awsdoc_index = {
    "settings": {
        "index.knn": True,
        "index.knn.space_type": "cosinesimil",
        "default_pipeline": pipeline_id,
        "analysis": {
          "analyzer": {
            "default": {
              "type": "standard",
              "stopwords": "_english_"
            }
          }
        }
    },
    "mappings": {
        "properties": {
            "pdf_text_vector": {
                "type": "knn_vector",
                "dimension": 384,
                "method": {
                    "name": "hnsw",
                    "space_type": "l2",
                    "engine": "faiss"
                },
                "store": True
            },
            "pdf_text": {
                "type": "text",
                "store": True
            },
            "document_title": {
                "type": "text",
                "store": True
            },
            "page_number": {
                "type": "text",
                "store": True
            }
        }
    }
}

In [137]:
aos_client.indices.delete(index="nlp_awsdocs")

aos_client.indices.create(index="nlp_awsdocs",body=awsdoc_index,ignore=400)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'nlp_awsdocs'}

In [138]:
# Let's verify created index
aos_client.indices.get(index="nlp_awsdocs")

{'nlp_awsdocs': {'aliases': {},
  'mappings': {'properties': {'document_title': {'type': 'text',
     'store': True},
    'page_number': {'type': 'text', 'store': True},
    'pdf_text': {'type': 'text', 'store': True},
    'pdf_text_vector': {'type': 'knn_vector',
     'store': True,
     'dimension': 384,
     'method': {'engine': 'faiss',
      'space_type': 'l2',
      'name': 'hnsw',
      'parameters': {}}}}},
  'settings': {'index': {'number_of_shards': '5',
    'provided_name': 'nlp_awsdocs',
    'knn.space_type': 'cosinesimil',
    'default_pipeline': 'nlp_pipeline',
    'knn': 'true',
    'creation_date': '1693161785104',
    'analysis': {'analyzer': {'default': {'type': 'standard',
       'stopwords': '_english_'}}},
    'number_of_replicas': '1',
    'uuid': '4X-HnkhCTK6vhMmkzzkUXg',
    'version': {'created': '136287827'}}}}}

In [143]:
# aos_client.index(index='nlp_pqa',body={"question": content,"answer":answer})

for document in docs: 
    aos_client.index(index='nlp_awsdocs',
                     body={
                         "pdf_text" : document.page_content,
                         "document_title" : document.metadata['source'].split('/')[1],
                         "page_number" : document.metadata['page']
                     }
                    )

In [144]:
# To validate the load, we'll query the number of documents number in the index. We should have 2319 hits in the index.
res = aos_client.search(index="nlp_awsdocs", body={"query": {"match_all": {}}})
print("Records found: %d." % res['hits']['total']['value'])

Records found: 2810.


In [156]:
import pandas as pd

#query_text='What images get new SageMaker/AWS features?'
#query_text='Why to Choose the Base DLAMI?'
query_text='How do you install Code whisperer on vs code?'

query={
  "_source": {
        "exclude": [ "pdf_text_vector" ]
    },
  "size": 30,
  "query": {
    "neural": {
      "pdf_text_vector": {
        #"query_text": "what are the features of API gateway?",
        "query_text":query_text,
        "model_id": model_id,
        "k": 30
      }
    }
  }
}

res = aos_client.search(index="nlp_awsdocs", 
                       body=query,
                       stored_fields=["pdf_text","page_number"])
print("Got %d Hits:" % res['hits']['total']['value'])
query_result=[]
for hit in res['hits']['hits']:
    row=[hit['_id'],hit['_score'],hit['_source']['pdf_text'],hit['_source']['page_number']]
    query_result.append(row)

query_result_df = pd.DataFrame(data=query_result,columns=["_id","_score","pdf_text","page_number"])
display(query_result_df)

Got 411 Hits:


Unnamed: 0,_id,_score,pdf_text,page_number
0,AE1UOIoBGSulaPZDVICJ,0.581722,packages.microsoft.com/yumrepos/vscode\nenable...,110
1,W01TOIoBGSulaPZDp3-I,0.547181,CodeWhisperer User Guide\nInstalling or updati...,6
2,yU1SOIoBGSulaPZDB33F,0.547181,CodeWhisperer User Guide\nInstalling or updati...,6
3,4k1SOIoBGSulaPZDB33F,0.542482,CodeWhisperer User Guide\nWith VS Code and Jet...,20
4,dE1TOIoBGSulaPZDwn_z,0.542482,CodeWhisperer User Guide\nWith VS Code and Jet...,20
5,5E1SOIoBGSulaPZDB33F,0.528323,CodeWhisperer User Guide\nCodeWhisperer Profes...,21
6,dk1TOIoBGSulaPZDxX81,0.528323,CodeWhisperer User Guide\nCodeWhisperer Profes...,21
7,Ak1SOIoBGSulaPZDB37F,0.514997,CodeWhisperer User Guide\nPausing suggestions1...,43
8,lE1TOIoBGSulaPZD5H81,0.514997,CodeWhisperer User Guide\nPausing suggestions1...,43
9,_01SOIoBGSulaPZDB33F,0.507726,CodeWhisperer User Guide\nIDE supportCodeWhisp...,41


In [157]:
query={
  "_source": {
        "exclude": [ "pdf_text_vector" ]
    },
  "size": 30,
  "query": {
    "neural": {
      "pdf_text_vector": {
        #"query_text": "what are the features of API gateway?",
        #"query_text": "Who uses API gateway?",
        #"query_text":  'What are the cloudwatch metrics for monitoring websocket APIs?',
        "query_text": query_text,
        "model_id": model_id,
        "k": 30
      }
    }
  }
}

relevant_documents = aos_client.search(
    body = query,
    index = 'nlp_awsdocs'
)

In [158]:
# from utils import print_ww

relevant_documents
print(len(relevant_documents["hits"]["hits"]))
print("--------------------")
context = " "
for i, rel_doc in enumerate(relevant_documents["hits"]["hits"]):
    # print_ww(f'## Document {i+1}: {relevant_documents["hits"]["hits"][i]["_source"]["pdf_text"]}.......')
    # print('---')
    context += relevant_documents["hits"]["hits"][i]["_source"]["pdf_text"]

30
--------------------


### Claude v2 LLM Response

In [130]:
#query = 'What are the features of Api Gateway?'
#query = 'Who uses Api Gateway?'
#query = 'What are the cloudwatch metrics for monitoring websocket APIs?'

In [131]:
from langchain.llms.bedrock import Bedrock

claude_llm = Bedrock(model_id="anthropic.claude-v2", client=bedrock, model_kwargs={'max_tokens_to_sample':400})
titan_llm = Bedrock(model_id= "amazon.titan-tg1-large", client=bedrock)

In [132]:
parameters = {
    "maxTokenCount":512,
    "stopSequences":[],
    "temperature":0.1,
    "topP":0.9
    }

In [159]:
prompt_data_claude = f"""Human: Answer the question based only on the information provided. If the answer is not in the context, say "I don't know, answer not found in the documents. Provide quote from the document.
<context>
{context}
</context>
<question>
{query_text}
</question>
Assistant:"""

In [163]:
 output_text_claude = claude_llm(prompt_data_claude)

print ("########## Ouput from Claude Model #################\n")
print(output_text_claude)

########## Ouput from Claude Model #################

 Based on the provided context, here are the steps to install CodeWhisperer on Visual Studio Code:

1. Open your local instance of VS Code (with the AWS Toolkit for Visual Studio already installed).

2. In the Toolkit for Visual Studio pane, under Developer tools, under CodeWhisperer, select Start. A dropdown menu will appear at the top of VS Code.

3. From the dropdown menu, select your preferred authentication method (e.g. Connect using IAM Identity Center). 

4. Follow the prompts to complete the authentication process.

5. Once authenticated, CodeWhisperer will be activated in VS Code.

So the key steps are to have the AWS Toolkit installed, select CodeWhisperer Start from the toolkit pane, and complete the authentication process. The context mentions opening a local instance of VS Code, so CodeWhisperer needs to be set up on each developer's local IDE.


### Titan LLM Response

In [161]:
 prompt_data_titan = f"""Answer the below question based on the context provided. If the answer is not in the context, say "I don't know, answer not found in the documents".
{context}
{query}
"""

In [162]:
output_text_titan = titan_llm(prompt_data_titan)
print ("########## Ouput from Titan Model ################\n")
print(output_text_titan)

########## Ouput from Titan Model ################

3. In the AWS Toolkit: Select Settings tab.
4. Under CodeWhisperer, select the box next to Include Suggestions with Code References.
5. Choose Close.
Turning code references oﬀ and on
With the reference log, you can view references to code recommendations. You can also update and edit 
code recommendations suggested by CodeWhisperer.
This section explains how to use the code reference options.
AWS Toolkit for Visual Studio Code
When you use CodeWhisperer with VS Code, code references are on by default.
To turn them oﬀ
