In [1]:
sports_text = "The Giants and Dodgers play today, March 17, at 5pm."
finance_text = "The price of WDY stock was $272.45 on March 17."

In [2]:
import boto3
bedrock_runtime = boto3.client(region_name = 'us-east-1', service_name='bedrock-runtime')

In [3]:
import json
json_request = {'inputText': sports_text}
body = json.dumps(json_request)
response = bedrock_runtime.invoke_model(body=body, modelId = 'amazon.titan-embed-text-v1')

In [4]:
response_body = json.loads(response.get('body').read())

In [5]:
def embed_text(text):
    json_request = {'inputText': text}
    body = json.dumps(json_request)
    response = bedrock_runtime.invoke_model(body=body, modelId = 'amazon.titan-embed-text-v1')
    return json.loads(response.get('body').read())['embedding']

In [6]:
import pandas as pd
data = {'name': ['sports_text', 'finance_text'], 'text': [sports_text, finance_text]}
df = pd.DataFrame(data)

In [7]:
df

Unnamed: 0,name,text
0,sports_text,"The Giants and Dodgers play today, March 17, a..."
1,finance_text,The price of WDY stock was $272.45 on March 17.


In [8]:
df['embedding'] = df['text'].apply(embed_text)
df

Unnamed: 0,name,text,embedding
0,sports_text,"The Giants and Dodgers play today, March 17, a...","[0.8515625, 0.421875, -0.48828125, 0.010253906..."
1,finance_text,The price of WDY stock was $272.45 on March 17.,"[0.19433594, -0.13378906, -0.37890625, 0.59765..."


In [9]:
import numpy as np
def cosine_similarity(v1, v2):
    vector1 = np.array(v1)
    vector2 = np.array(v2)
    
    dot_product = np.dot(np.array(v1), np.array(v2))
    magnitude_v1 = np.linalg.norm(v1)
    magnitude_v2 = np.linalg.norm(v2)

    return dot_product / (magnitude_v1 * magnitude_v2)

In [10]:
vec1 = df['embedding'][0]

In [11]:
vec2 = df['embedding'][1]

In [12]:
cosine_similarity(vec1, vec2)

0.0250861808710861

In [13]:
query = "What was the price of WDY stock on March 17?"

In [14]:
prompt_embedding = embed_text(query)

In [15]:
df['prompt_similarity'] = df['embedding'].apply(lambda vector: cosine_similarity(vector, prompt_embedding))

In [16]:
df

Unnamed: 0,name,text,embedding,prompt_similarity
0,sports_text,"The Giants and Dodgers play today, March 17, a...","[0.8515625, 0.421875, -0.48828125, 0.010253906...",0.126192
1,finance_text,The price of WDY stock was $272.45 on March 17.,"[0.19433594, -0.13378906, -0.37890625, 0.59765...",0.792362


In [17]:
most_similar_text = df.nlargest(1, 'prompt_similarity').iloc[0]['text'] # Get the text with the highest similarity

In [18]:
full_prompt = f"Answer this question based on the context provided. Here is the question: \n{query} \n\nHere is the context: \n{most_similar_text} Provid a simple answer to the question."

In [19]:
print(full_prompt)

Answer this question based on the context provided. Here is the question: 
What was the price of WDY stock on March 17? 

Here is the context: 
The price of WDY stock was $272.45 on March 17. Provid a simple answer to the question.


In [20]:
body = json.dumps({'prompt': full_prompt, 'max_gen_len': 1024, 'temperature': 0.9, 'top_p': 0.9})

In [24]:
response = bedrock_runtime.invoke_model(body=body, modelId='meta.llama2-70b-chat-v1')

In [25]:
response_body = json.loads(response.get('body').read())

In [26]:
response_body['generation']

'\n\nAnswer: \n$272.45'