In [1]:
from decouple import config
from openai import OpenAI

In [2]:
OPENAI_API_KEY = config("OPENAI_API_KEY")
UPSTASH_VECTOR_REST_URL = config("UPSTASH_VECTOR_REST_URL")
UPSTASH_VECTOR_REST_TOKEN = config("UPSTASH_VECTOR_REST_TOKEN")

In [3]:
client = OpenAI(api_key=OPENAI_API_KEY)

In [4]:
def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [5]:
documents = [
    "The cat jumped over the dog",
    "The cow jumped over the moon",
    "The turkey ran in circles",
]

In [6]:
embeddings = [get_embedding(x) for x in documents]

In [7]:
dataset = {}
for i, embedding in enumerate(embeddings):
    dataset[i] = embedding

In [13]:
from upstash_vector import Vector

from upstash_vector import Index

index = Index(url=UPSTASH_VECTOR_REST_URL, token=UPSTASH_VECTOR_REST_TOKEN)

In [16]:
vectors = []
for key, value in dataset.items():
    print(key)
    my_id = key
    embedding = value
    vectors.append(Vector(id=my_id, vector=embedding))

0
1
2


In [17]:
vectors

[Vector(id=0, vector=[-0.0018775435164570808, -0.04121466726064682, -0.009573342278599739, -0.01355969812721014, 0.002230533864349127, 0.026169974356889725, -0.012099049054086208, 0.0033199351746588945, 0.005547426175326109, -0.01973094418644905, 0.011484358459711075, 0.017880788072943687, -0.036857061088085175, 0.024405023083090782, -0.021861057728528976, 0.03140396997332573, -0.029529469087719917, -0.03464174270629883, -0.08067655563354492, 0.007881422527134418, -0.024745840579271317, 0.031257905066013336, 0.00640251487493515, -0.030454548075795174, -0.005614372435957193, 0.011606079526245594, 0.028872177004814148, 0.013523181900382042, 0.014789078384637833, 0.002477018628269434, 0.004859703592956066, -0.026802923530340195, -0.0014674965059384704, -0.050927989184856415, -0.024003343656659126, 0.00998719222843647, -0.026121286675333977, -0.023431256413459778, -0.013133675791323185, 0.0010848367819562554, -0.030186761170625687, -0.024684980511665344, 0.015215101651847363, -0.0221044998

In [18]:
index.upsert(
  vectors=vectors
)

'Success'

In [30]:
dataset[3] = get_embedding("The moose sat by the dog")

In [32]:
index.upsert(vectors=[Vector(id=3, vector=dataset[3])])

'Success'

In [35]:
query_str = "The moose sat by the dog"
query_embedding = get_embedding(query_str)

In [36]:
results = index.query(
  vector=query_embedding,
  top_k=3,
  include_vectors=True,
  include_metadata=True
)

for result in results:
    print(result.id, result.score * 100)

3 100.0
2 86.591685
0 72.285026
