# workshop aws Bedrock

In [2]:
!pip install boto3 --upgrade

[0m

In [3]:
!pip install Pillow --upgrade
!pip install numpy --upgrade
!pip install pandas --upgrade
!pip install scikit-learn --upgrade
#!pip install psycopg2-binary --upgrade
!pip install requests --upgrade
!pip install requests_aws4auth --upgrade
!pip install pinecone-client --upgrade
!pip install threadpoolctl==3.1.0
!pip install numexpr==2.8.4
!pip install bottleneck==1.3.6

[0m

In [4]:
# create embedding usin Titan from Amazon
import json
import boto3

bedrock = boto3.client(
    service_name='bedrock-runtime'
)

modelId = 'amazon.titan-embed-text-v1'
accept = 'application/json'
contentType = 'application/json'
prompt = """
hello
"""
input = {
        'inputText': prompt
    }
body=json.dumps(input)
response = bedrock.invoke_model(
    body=body, modelId=modelId, accept=accept,contentType=contentType)
response_body = json.loads(response.get('body').read())
embedding = response_body['embedding']
print(embedding)

[0.44140625, -0.037353516, -0.29882812, 0.359375, 0.55859375, 0.06591797, 0.34765625, -0.0006828308, 0.061279297, -0.025512695, 0.453125, 0.7109375, -0.021728516, -0.08691406, 0.34570312, -0.32226562, -0.21777344, -0.40234375, -0.8828125, 0.26171875, -0.48632812, 0.3515625, -0.037353516, 0.19726562, -0.7578125, -0.6796875, 0.68359375, -0.625, 0.20898438, -0.7890625, -0.18554688, 0.76171875, 0.46484375, -0.7109375, -0.15429688, -0.16113281, 0.40820312, -0.15820312, 0.5859375, 0.12792969, -0.25976562, -0.45703125, 1.046875, -0.21191406, 0.515625, -0.026000977, -0.55078125, 0.23046875, 0.84375, 0.484375, 0.36132812, 0.41015625, -0.5234375, -0.026977539, -0.053466797, 0.66015625, 0.22558594, -0.091796875, -0.0087890625, -0.53515625, 0.028198242, -0.4609375, -0.31054688, 0.65625, 0.609375, -0.3125, -0.72265625, 0.42382812, -0.47460938, -0.057128906, 0.4453125, -0.46289062, 0.265625, 0.49023438, 0.25390625, 0.14941406, 0.64453125, 0.14648438, 0.34765625, 0.38867188, 0.040771484, 0.19335938, 

In [5]:
#print(embedding)
size = len(embedding)
print(size)

1536


In [6]:
# euclidean distance
import math
# Function to get embedding from Bedrock model
def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    
    # Prepare the input for the model
    input_data = {
        'inputText': text
    }
    
    # Convert the input data to JSON format
    body=json.dumps(input_data)
    
    # Invoke the Bedrock model to get the embedding
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    
    # Parse the response to extract the embedding
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding

# Function to calculate Euclidean distance between two vectors, how close is betwen those vectors
def calculate_distance(v1, v2):
    distance = math.dist(v1, v2)
    return distance
# main function
def main():
    #Initialize the Bedrock client
    bedrock = boto3.client(
        service_name='bedrock-runtime'
    )
    
    text1 = 'hello'
    text2 = 'good day'
    # Get embeddings for the texts
    v1 = get_embedding(bedrock, text1)
    v2 = get_embedding(bedrock, text2)
    distance = calculate_distance(v1, v2)
    # Print the calculated distance
    print(f"Euclidean distance between '{text1}' and '{text2}': {distance}")
    
# Run the main function
if __name__ == "__main__":
    main()    

Euclidean distance between 'hello' and 'good day': 23.33325186409251


In [7]:
!pip install scikit-learn
!pip install matplotlib

[0m

In [8]:
# another distances
hello = get_embedding(bedrock, 'hello')
texts = [
    'hi',
    'good day',
    'greetings',
    'how are you',
    'what is your name',
    "let's go shopping",
    'what is general relativity',
    'she sells sea shells on the sea shore'
]
# Get embeddings for the texts
embeddings = [get_embedding(bedrock, text) for text in texts]
# Calculate distances from the reference text 'hello'
distances = [calculate_distance(hello, embedding) for embedding in embeddings]

# Print the distances
for text, distance in zip(texts, distances):
    print(f"Distance from 'hello' to '{text}': {distance}")
    


Distance from 'hello' to 'hi': 9.094554322587365
Distance from 'hello' to 'good day': 23.33325186409251
Distance from 'hello' to 'greetings': 15.699801139590388
Distance from 'hello' to 'how are you': 20.948954281263347
Distance from 'hello' to 'what is your name': 24.261418207910705
Distance from 'hello' to 'let's go shopping': 25.30846242351385
Distance from 'hello' to 'what is general relativity': 29.69741947688189
Distance from 'hello' to 'she sells sea shells on the sea shore': 26.700609050941793


In [9]:
# dot similarity 

from numpy import dot
from numpy.linalg import norm

# Function to get embedding from Bedrock model
def get_embedding(bedrock_client, text):
    model_id = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    content_type = 'application/json'

    # Prepare the input for the model
    input_data = {
        'inputText': text
    }

    # Convert the input data to JSON format
    body = json.dumps(input_data)

    # Invoke the Bedrock model to get the embedding
    response = bedrock_client.invoke_model(
        body=body,
        modelId=model_id,
        accept=accept,
        contentType=content_type
    )

    # Parse the response to extract the embedding
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']

    return embedding

# Function to calculate dot product similarity between two vectors
def calculate_dot_product_similarity(vector1, vector2):
    # Calculate the dot product of the two vectors
    similarity = dot(vector1, vector2)
    return similarity

# Main function
def main():
    # Initialize the Bedrock client
    bedrock_client = boto3.client(service_name='bedrock-runtime')

    # Define the texts for which embeddings are to be calculated
    text1 = 'hello'
    text2 = 'good day'

    try:
        # Get embeddings for the texts
        embedding1 = get_embedding(bedrock_client, text1)
        embedding2 = get_embedding(bedrock_client, text2)

        # Calculate the dot product similarity between the two embeddings
        similarity = calculate_dot_product_similarity(embedding1, embedding2)

        # Print the calculated similarity
        print(f"Dot product similarity between '{text1}' and '{text2}': {similarity}")
        
        hello = get_embedding(bedrock, 'hello')
        texts = [
            'hi',
            'good day',
            'greetings',
            'how are you',
            'what is your name',
            "let's go shopping",
            'what is general relativity',
            'she sells sea shells on the sea shore'
        ]
        # Get embeddings for the texts
        embeddings = [get_embedding(bedrock, text) for text in texts]
        # Calculate distances from the reference text 'hello'
        distances = [calculate_dot_product_similarity(hello, embedding) for embedding in embeddings]

        # Print the distances
        for text, distance in zip(texts, distances):
            print(f"Dot product similarity betweew 'hello' and '{text}': {distance}")
            
    except Exception as e:
        # Handle any exceptions that occur during the process
        print(f"An error occurred: {e}")

# Run the main function
if __name__ == "__main__":
    main()


Dot product similarity between 'hello' and 'good day': 245.3465466016232
Dot product similarity betweew 'hello' and 'hi': 523.4078294500584
Dot product similarity betweew 'hello' and 'good day': 245.3465466016232
Dot product similarity betweew 'hello' and 'greetings': 328.5758089531838
Dot product similarity betweew 'hello' and 'how are you': 218.5153600999188
Dot product similarity betweew 'hello' and 'what is your name': 173.35412997474873
Dot product similarity betweew 'hello' and 'let's go shopping': 127.174288252552
Dot product similarity betweew 'hello' and 'what is general relativity': 69.75002899099037
Dot product similarity betweew 'hello' and 'she sells sea shells on the sea shore': 94.28087665222931


In [10]:
# cosine similarity
def calculate_cousin_similarity(v1, v2):
    similarity = dot(v1, v2)/(norm(v1)*norm(v2))
    return similarity

# Main function
def main():
    # Initialize the Bedrock client
    bedrock_client = boto3.client(service_name='bedrock-runtime')

    # Define the texts for which embeddings are to be calculated
    text1 = 'hello'
    text2 = 'good day'

    try:
        # Get embeddings for the texts
        embedding1 = get_embedding(bedrock_client, text1)
        embedding2 = get_embedding(bedrock_client, text2)

        # Calculate the cosine similarity between the two embeddings
        similarity = calculate_cousin_similarity(embedding1, embedding2)

        # Print the calculated similarity
        print(f"Dot product similarity between '{text1}' and '{text2}': {similarity}")
        
        hello = get_embedding(bedrock, 'hello')
        texts = [
            'hi',
            'good day',
            'greetings',
            'how are you',
            'what is your name',
            "let's go shopping",
            'what is general relativity',
            'she sells sea shells on the sea shore'
        ]
        # Get embeddings for the texts
        embeddings = [get_embedding(bedrock, text) for text in texts]
        # Calculate distances from the reference text 'hello'
        distances = [calculate_cousin_similarity(hello, embedding) for embedding in embeddings]

        # Print the distances
        for text, distance in zip(texts, distances):
            print(f"Cosine similarity betweew 'hello' and '{text}': {distance}")
            
    except Exception as e:
        # Handle any exceptions that occur during the process
        print(f"An error occurred: {e}")

# Run the main function
if __name__ == "__main__":
    main()

Dot product similarity between 'hello' and 'good day': 0.47547669109839213
Cosine similarity betweew 'hello' and 'hi': 0.9268443924716357
Cosine similarity betweew 'hello' and 'good day': 0.47547669109839213
Cosine similarity betweew 'hello' and 'greetings': 0.7481008999511818
Cosine similarity betweew 'hello' and 'how are you': 0.5187598232110757
Cosine similarity betweew 'hello' and 'what is your name': 0.3777644972419649
Cosine similarity betweew 'hello' and 'let's go shopping': 0.29329218796125756
Cosine similarity betweew 'hello' and 'what is general relativity': 0.13715628607522645
Cosine similarity betweew 'hello' and 'she sells sea shells on the sea shore': 0.21532926299725408


In [11]:
#search text by similarity


# Function to calculate Euclidean distance between two vectors
def calculate_distance(v1, v2):
    # Use math.dist to calculate the Euclidean distance
    distance = math.dist(v1, v2)
    return distance

# Function to search the dataset for the closest text to the query vector
def search(dataset, query_vector):
    # Calculate distances and add them to the dataset
    for item in dataset:
        item['distance'] = calculate_distance(item['embedding'], query_vector)

    # Sort the dataset by distance
    dataset.sort(key=lambda x: x['distance'])

    # Return the text of the closest item
    return dataset[0]['text']

# Main function
def main():
    # Initialize the Bedrock client
    bedrock_client = boto3.client(service_name='bedrock-runtime')

    # Define the texts for the dataset
    texts = [
        """
        The theory of general relativity says that the observed gravitational effect between masses results from their warping of spacetime.
        """,
        """
        Quantum mechanics allows the calculation of properties and behaviour of physical systems. It is typically applied to microscopic systems: molecules, atoms and sub-atomic particles.
        """,
        """
        Wavelet theory is essentially the continuous-time theory that corresponds to dyadic subband transforms — i.e., those where the L (LL) subband is recursively split over and over.
        """,
        """
        Every particle attracts every other particle in the universe with a force that is proportional to the product of their masses and inversely proportional to the square of the distance between their centers.
        """,
        """
        The electromagnetic spectrum is the range of frequencies (the spectrum) of electromagnetic radiation and their respective wavelengths and photon energies.
        """
    ]

    # Precompute embeddings for the dataset
    dataset = [{'text': text, 'embedding': get_embedding(bedrock_client, text)} for text in texts]

    # Define the query text
    query_text = 'Isaac Newton'

    try:
        # Get the embedding for the query text
        query_vector = get_embedding(bedrock_client, query_text)

        # Perform the search
        result = search(dataset, query_vector)

        # Print the result
        print(f"Closest text to '{query_text}': {result}")

    except Exception as e:
        # Handle any exceptions that occur during the process
        print(f"An error occurred: {e}")

# Run the main function
if __name__ == "__main__":
    main()


Closest text to 'Isaac Newton': 
        Every particle attracts every other particle in the universe with a force that is proportional to the product of their masses and inversely proportional to the square of the distance between their centers.
        


In [13]:
# sentiment analysis 

# Function to classify the query vector into the closest class
def classify(classes, query_vector):
    # Calculate distances and add them to the classes
    for item in classes:
        item['distance'] = calculate_distance(item['embedding'], query_vector)

    # Sort the classes by distance
    classes.sort(key=lambda x: x['distance'])

    # Return the name of the closest class
    return classes[0]['name']

# Main function
def main():
    # Initialize the Bedrock client
    bedrock_client = boto3.client(service_name='bedrock-runtime')

    # Define the classes for classification
    classes = [
        {'name': 'positive', 'description': 'customer demonstrated positive sentiment in the response.'},
        {'name': 'negative', 'description': 'customer demonstrated negative sentiment in the response.'}
    ]

    # Precompute embeddings for the classes
    for item in classes:
        item['embedding'] = get_embedding(bedrock_client, item['description'])

    # Define the query text
    query_text = 'Steve helped me solve the problem in just a few minutes. Thank you for the great work!'
    # query_text = 'It took too long to get a response from your support engineer!'

    try:
        # Get the embedding for the query text
        query_vector = get_embedding(bedrock_client, query_text)

        # Perform the classification
        result = classify(classes, query_vector)

        # Print the result
        print(f"Classification result for '{query_text}': {result}")

    except Exception as e:
        # Handle any exceptions that occur during the process
        print(f"An error occurred: {e}")

# Run the main function
if __name__ == "__main__":
    main()


Classification result for 'Steve helped me solve the problem in just a few minutes. Thank you for the great work!': positive
