In [3]:
import json
import boto3

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    #print(response_body)
    embedding = response_body['embedding']
    return embedding

# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
# some random data
people = ['Albert Einstein', 'Isaac Newton', 'Stephen Hawking', 
          'Galileo Galilei', 'Niels Bohr', 'Werner Heisenberg', 
          'Marie Curie', 'Ernest Rutherford', 'Michael Faraday', 'Richard Feynman']
actions = ['plays basketball', 'teaches physics', 'sells sea shells', 
           'collects tax', 'drives buses', 'researches into gravity', 
           'manages a shop', 'supervises graduate students', 
           'works as a support engineer', 'runs a bank']
places = ['London', 'Sydney', 'Los Angeles', 'San Francisco', 'Beijing', 
          'Cape Town', 'Paris', 'Cairo', 'New Delhi', 'Seoul']
# create a data file
count = 1
with open('dataset.json', 'w') as outfile:
    for name in people:
        for action in actions:
            for place in places:
                id   = count
                text = '{name} {action} in {place}.'.format(name=name, action=action, place=place)
                embedding = get_embedding(bedrock, text)
                item = {'id': id, 'text': text, 'embedding': embedding}
                json_object = json.dumps(item)
                outfile.write(json_object + '\n')
                count = count + 1
    print('Dataset created.')


Dataset created.


In [4]:
import json
import boto3
import math
from datetime import datetime

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding

def load_dataset(filename):
    dataset = []
    with open(filename) as file:
        for line in file:
            dataset.append(json.loads(line))
    return dataset

def calculate_distance(v1, v2):
    distance = math.dist(v1, v2)
    return distance
    
def search(dataset, embedding):
    t1 = datetime.now()
    for item in dataset:
        item['distance'] = calculate_distance(item['embedding'], embedding)
    t2 = datetime.now()
    delta = t2 - t1
    ms1 = 1000 * delta.total_seconds()
    dataset.sort(key=lambda x: x['distance'])
    t3 = datetime.now()
    delta = t3 - t2
    ms2 = 1000 * delta.total_seconds()
    print(str(ms1) + 'ms in calculating distances')
    print(str(ms2) + 'ms in sorting distances')
    return dataset[0]['text']

# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
dataset = load_dataset('dataset.json')
query   = 'Lady Gaga purchased a necklace in Singapore.'
embedding = get_embedding(bedrock, query)
result  = search(dataset, embedding)
print(result)


30.346999999999998ms in calculating distances
0.33799999999999997ms in sorting distances
Marie Curie sells sea shells in Los Angeles.
