### Import libraries

In [23]:
import boto3
import json
import os
import pandas as pd
import numpy as np

### Create Bedrock runtime

In [2]:
bedrock_runtime = boto3.client(region_name = 'us-east-1', service_name='bedrock-runtime')

In [3]:
def embed_text(text):
    '''
    INPUT: str
    OUTPUT: embedding, either Python list or a Numpy array
    '''
    json_request = {'inputText': text}
    body = json.dumps(json_request)
    response = bedrock_runtime.invoke_model(body=body, modelId = 'amazon.titan-embed-text-v1')
    return json.loads(response.get('body').read())['embedding']

In [18]:
PATH = "bedrock-aws/00-Sports-Articles"

data = []

for filename in os.listdir(PATH):
    if filename.endswith('txt'):
        file_path = os.path.join(PATH, filename)

        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()

        data.append([file_path, content])

In [20]:
df = pd.DataFrame(data, columns=['filename', 'text'])
df

Unnamed: 0,filename,text
0,bedrock-aws/00-Sports-Articles/BasketballGame.txt,Nail-Biting Overtime Battle Sees Lakers Triump...
1,bedrock-aws/00-Sports-Articles/BaseballGame.txt,Extra-Inning Thriller: Yankees Outlast Red Sox...
2,bedrock-aws/00-Sports-Articles/FootballGame.txt,Epic Clash between 49ers and Buccaneers Ends i...


### Create embeddings

In [22]:
df['embedding'] = df['text'].apply(embed_text)
df

Unnamed: 0,filename,text,embedding
0,bedrock-aws/00-Sports-Articles/BasketballGame.txt,Nail-Biting Overtime Battle Sees Lakers Triump...,"[-0.063942306, 0.14368992, -0.018919019, -0.17..."
1,bedrock-aws/00-Sports-Articles/BaseballGame.txt,Extra-Inning Thriller: Yankees Outlast Red Sox...,"[0.2353478, -0.14758441, -0.22002964, -0.30111..."
2,bedrock-aws/00-Sports-Articles/FootballGame.txt,Epic Clash between 49ers and Buccaneers Ends i...,"[-0.48678175, -0.11641871, -0.38582614, 0.0014..."


### Create cosine similarity

In [24]:
def cosine_similarity(v1, v2):
    vector1 = np.array(v1)
    vector2 = np.array(v2)
    
    dot_product = np.dot(np.array(v1), np.array(v2))
    magnitude_v1 = np.linalg.norm(v1)
    magnitude_v2 = np.linalg.norm(v2)

    return dot_product / (magnitude_v1 * magnitude_v2)

### Create embedding for prompt

In [25]:
def most_similar_text(prompt):
    prompt_embedding = embed_text(prompt)
    df['prompt_similarity'] = df['embedding'].apply(lambda vector: cosine_similarity(vector, prompt_embedding))
    return df.nlargest(1, 'prompt_similarity').iloc[0]['text']

In [27]:
print(most_similar_text("What was the score of the 49ers football game?"))

Epic Clash between 49ers and Buccaneers Ends in Thrilling Showdown

Date: October 15, 2023

In a highly anticipated match-up on the gridiron, the San Francisco 49ers squared off against the Tampa Bay Buccaneers on October 15, 2023, in what turned out to be a thrilling display of football prowess. With fans eagerly watching, both teams brought their A-game, resulting in an electrifying contest that will be remembered for years to come.

The final scoreline of the game read 27-24 in favor of the 49ers, but the journey to that outcome was nothing short of extraordinary.

First Quarter Fireworks:

The first quarter set the tone for the entire game as the Buccaneers, led by their star quarterback, Tom Brady, executed a perfectly choreographed drive ending in a touchdown pass to wide receiver Mike Evans. However, the 49ers' defense, known for its tenacity, responded with a crucial interception.

Back-and-Forth Battle:

As the game unfolded, both teams traded blows with pinpoint passes and st

### Create RAG prompt

In [28]:
def llm_with_rag(prompt):
    rag_text = most_similar_text(prompt)
    full_prompt = f"{rag_text}\n\nANSWER THE FOLLOWING QUESTION:\n{prompt}"
    body = json.dumps({'inputText': full_prompt})

    response = bedrock_runtime.invoke_model(body=body, modelId='amazon.titan-text-express-v1')
    response_body = json.loads(response.get('body').read())
    return response_body['results'][0]['outputText']

In [29]:
llm_with_rag("What was the score of the 49ers game?")

'\nThe final score of the 49ers game was 27-24.'