In [3]:
import json
import boto3

bedrock = boto3.client(
    service_name='bedrock-runtime'
)

modelId = 'amazon.titan-embed-text-v1'
accept = 'application/json'
contentType = 'application/json'
prompt = """
hello
"""
input = {
        'inputText': prompt
    }
body=json.dumps(input)
response = bedrock.invoke_model(
    body=body, modelId=modelId, accept=accept,contentType=contentType)
response_body = json.loads(response.get('body').read())
embedding = response_body['embedding']
'''
The foundation model decides the number of elements (dimensions) in the vector, not the input text. With amazon.titan-embed-text-v1 model, the size of the output vector is 1536.
'''
#print(embedding)
size = len(embedding)
print(size)


1536


In [4]:
import json
import boto3
import math

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding

def calculate_distance(v1, v2):
    distance = math.dist(v1, v2)
    return distance

# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
text1 = 'hello'
text2 = 'good day'
v1 = get_embedding(bedrock, text1)
v2 = get_embedding(bedrock, text2)
distance = calculate_distance(v1, v2)
print(distance)


23.33325186409251


In [5]:
# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
hello = get_embedding(bedrock, 'hello')
texts = [
    'hi',
    'good day',
    'greetings',
    'how are you',
    'what is your name',
    "let's go shopping",
    'what is general relativity',
    'she sells sea shells on the sea shore'
]
for text in texts:
    embedding = get_embedding(bedrock, text)
    distance = calculate_distance(hello, embedding)
    print(distance)


9.094554322587365
23.33325186409251
15.699801139590388
20.948954281263347
24.261418207910705
25.30846242351385
29.69741947688189
26.700609050941793


In [6]:
import json
import boto3
from numpy import dot
from numpy.linalg import norm

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding

def calculate_dot_product_similarity(v1, v2):
    similarity = dot(v1, v2)
    return similarity

# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
text1 = 'hello'
text2 = 'good day'
v1 = get_embedding(bedrock, text1)
v2 = get_embedding(bedrock, text2)
similarity = calculate_dot_product_similarity(v1, v2)
print(similarity)


245.3465466016232


In [7]:
# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
hello = get_embedding(bedrock, 'hello')
texts = [
    'hi',
    'good day',
    'greetings',
    'how are you',
    'what is your name',
    "let's go shopping",
    'what is general relativity',
    'she sells sea shells on the sea shore'
]
for text in texts:
    embedding = get_embedding(bedrock, text)
    similarity = calculate_dot_product_similarity(hello, embedding)
    print(similarity)


523.4078294500584
245.3465466016232
328.5758089531838
218.5153600999188
173.35412997474873
127.174288252552
69.75002899099037
94.28087665222931


In [8]:
import json
import boto3
from numpy import dot
from numpy.linalg import norm

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding

def calculate_cousin_similarity(v1, v2):
    similarity = dot(v1, v2)/(norm(v1)*norm(v2))
    return similarity

# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
text1 = 'hello'
text2 = 'good day'
v1 = get_embedding(bedrock, text1)
v2 = get_embedding(bedrock, text2)
similarity = calculate_cousin_similarity(v1, v2)
print(similarity)


0.47547669109839213


In [9]:
# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
hello = get_embedding(bedrock, 'hello')
texts = [
    'hi',
    'good day',
    'greetings',
    'how are you',
    'what is your name',
    "let's go shopping",
    'what is general relativity',
    'she sells sea shells on the sea shore'
]
for text in texts:
    embedding = get_embedding(bedrock, text)
    similarity = calculate_cousin_similarity(hello, embedding)
    print(similarity)


0.9268443924716357
0.47547669109839213
0.7481008999511818
0.5187598232110757
0.3777644972419649
0.29329218796125756
0.13715628607522645
0.21532926299725408
