In [1]:
import boto3
import json
import numpy as np
from typing import List, Union
import csv

In [2]:
embeddings = []
with open('embeddings.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader)  # Skip header row if present
    for row in reader:
        #ensure row has both paragraph and embedding
        if len(row) >= 2:
            paragraph = row[0]
            embedding_str = row[1]
            embedding = [float(x) for x in embedding_str.split(',')]
            embeddings.append([paragraph, embedding])


In [3]:
def cosine_similarity(v1: List[float], 
                      v2: List[float]) -> float:
    """
    Calculate the cosine similarity between two vectors.
    Args:
        v1: First vector.
        v2: Second vector.
    Returns:
        Cosine similarity score.
    Raises:
        ValueError: If the input vectors have different lengths and all elements are zero.
    """

    v1_array = np.array(v1)
    v2_array = np.array(v2)
    if v1_array.shape != v2_array.shape:
        raise ValueError("Input vectors must have the same length.")

    # Check for zero vectors
    if np.all(v1_array == 0) and np.all(v2_array == 0):
        raise ValueError("Input vectors must not be zero vectors.")

    #compute dot product
    dot_product = np.dot(v1_array, v2_array)
    magnitude1 = np.linalg.norm(v1_array)
    magnitude2 = np.linalg.norm(v2_array)

    if magnitude1 == 0 or magnitude2 == 0:
        raise ValueError("Input vectors must not be zero vectors.")
    #compute cosine similarity
    cosine_similarity = dot_product / (magnitude1 * magnitude2)
    return max(min(cosine_similarity, 1.0), -1.0)  # Clamp value between -1 and 1


In [5]:
bedrock = boto3.client('bedrock-runtime', region_name='us-east-1')

In [16]:
#embedd the query
question = "What happened in October at Amazon?"
input_body = {
    "inputText": question,
}
response = bedrock.invoke_model(
    modelId='amazon.titan-embed-text-v2:0',
    body=json.dumps(input_body)
)
response_body = json.loads(response['body'].read())
query_embedding = response_body['embedding']

In [18]:
similarities = []
for paragraph, embedding in embeddings:
    similarity = cosine_similarity(query_embedding, embedding)
    similarities.append((similarity, paragraph))

In [19]:
top_similarities = sorted(similarities, key=lambda x: x[0], reverse=True)
print("Top similar paragraphs:", top_similarities[:5])


Top similar paragraphs: [(np.float64(0.329991836801164), "amazon Dear Shareholders: Last year at this time, I shared my enthusiasm and optimism for Amazon's future. Today, I have even more. The reasons are many, but start with the progress we've made in our financial results and customer"), (np.float64(0.3123615761117468), "traffic, sales, and service levels: Amazon.com's employee base grew from 158 to 614, and we significantly strengthened our management team. Distribution center capacity grew from 50,000 to 285,000 square feet, including a 70% expansion of our Seattle facilities and the launch of our second distribution center in Delaware in November."), (np.float64(0.29875156017241244), 'Amazon grew SO quickly the first few years. This coupling was further highlighted by a heavyweight mechanism we used to operate called "NPI." Any new initiative requiring work from multiple internal teams had to be reviewed by this NPI cabal where each team would communicate how many people-weeks th

In [20]:
prompt_text = f"""
<context>
{top_similarities[:5]}
</context>

<instruction>
Based on the context above, answer the question asked. Only use the information in the context. If the information is not there, answer with "I don't know".
</instruction>

<text>
{question}
</text>
"""

In [21]:
#invoke Bedrock Claude-3 moddel to return the answer
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
request_body = {
    "anthropic_version": "bedrock-2023-05-31",
    "max_tokens": 1024,
    "temperature": 0,
    'top_p': 0.002,
    "messages": [
        {
            "role": "user",
            "content": [{"type": "text", "text": prompt_text}]
        }
    ]
}

In [22]:
#iresponse from the model
response = bedrock.invoke_model(
    modelId=model_id,
    body=json.dumps(request_body)
)

response_body = json.loads(response['body'].read())
generated_text = response_body['content'][0]['text']
print(generated_text)

I don't know what specifically happened in October at Amazon based on the given context. The context mentions that Amazon's second distribution center was launched in Delaware in November, but does not provide any details about events in October.
