In [1]:
import boto3
import json
import numpy as np
from typing import List, Union

In [2]:
def cosine_similarity(v1: Union[List[float], np.ndarray],
                      v2: Union[List[float], np.ndarray]) -> float:
    """Calculate the cosine similarity between two vectors."""
    #convert to numpy array
    v1_array = np.array(v1)
    v2_array = np.array(v2)
    #check equal length
    if v1_array.shape != v2_array.shape:
        raise ValueError("Vectors must be of the same length")
    #dot product
    dot_product = np.dot(v1_array, v2_array)
    magnitude1 = np.linalg.norm(v1_array)
    magnitude2 = np.linalg.norm(v2_array)
    #check for zero vectors
    if magnitude1 == 0 or magnitude2 == 0:
        raise ValueError("One or both vectors are zero vectors")
    #calculate cosine similarity
    similarity = dot_product / (magnitude1 * magnitude2)
    #float precision
    return max(min(similarity, 1.0), -1.0)

In [3]:
bedrock = boto3.client('bedrock-runtime', region_name='us-east-1')

In [4]:
input_body = {
    "inputText": """the chat is red"""
}

response = bedrock.invoke_model(
    modelId="amazon.titan-embed-text-v2:0",
    body=json.dumps(input_body)
)
response_body = json.loads(response['body'].read())
embedding_1 = response_body.get('embedding')

In [9]:
input_body = {
    "inputText": """cat is haha"""
}

response = bedrock.invoke_model(
    modelId="amazon.titan-embed-text-v2:0",
    body=json.dumps(input_body)
)
response_body = json.loads(response['body'].read())
embedding_2 = response_body.get('embedding')

In [10]:
cosine = cosine_similarity(embedding_1, embedding_2)
print(f"Cosine Similarity: {cosine}")

Cosine Similarity: 0.26153519939965786
