# Vector database

## Global setup

In [1]:
# Global constants
OLLAMA_EMBEDDING_URL="http://localhost:11434/api/embed"
EMBEDDING_MODEL="nomic-embed-text"

In [2]:
# Global imports
import json
import requests

In [3]:
# Global utility functions
def get_embedding(session: requests.Session, model_name: str, text_input):
    response = session.post(
        OLLAMA_EMBEDDING_URL,
        data=json.dumps({
            "model": model_name,
            "input": text_input
        })
    )
    return response.json()

In [4]:
session = requests.Session()

## Try embeddings

In [5]:
response = get_embedding(session, EMBEDDING_MODEL, ["Hello world!"])
response["embeddings"]

[[0.0053903093,
  -0.0026179042,
  -0.17719288,
  -0.011143332,
  -0.013133138,
  0.0702707,
  -0.009469159,
  -0.026641931,
  -0.009747269,
  -0.058955394,
  0.013530472,
  0.055808485,
  0.022312537,
  0.06133417,
  0.024879402,
  -0.04767762,
  0.012098431,
  -0.052931823,
  -0.043903783,
  0.020416837,
  -0.02836235,
  -0.081815675,
  0.0026377046,
  0.032095633,
  0.10631707,
  0.010502971,
  -0.050546005,
  0.061973784,
  0.0050363615,
  0.0041382113,
  -0.01360648,
  -0.0063092876,
  -0.0014158383,
  0.029270498,
  0.060902916,
  -0.00017745754,
  0.029295564,
  0.01138837,
  0.021856742,
  -0.012949824,
  0.0058418657,
  -0.012409977,
  0.013261852,
  -0.000922938,
  0.07715618,
  -0.016976612,
  -0.0023586017,
  -0.0167604,
  0.07356576,
  -0.037564773,
  -0.041481137,
  0.0018473217,
  0.000385279,
  0.05638736,
  0.060303554,
  0.01692413,
  0.045276172,
  -0.049780067,
  0.016918506,
  0.031175999,
  0.042276923,
  0.05287051,
  0.030832794,
  0.053609595,
  -0.005021142,
 

In [6]:
len(response["embeddings"][0])

768

## Load vector database

In [7]:
from qdrant_client import QdrantClient

# Create the vector database client
client = QdrantClient("http://localhost:6333")

Read all the files and get the embeddings so we can store the information in the vector database.

In [28]:
from pathlib import Path

data = []

path = Path("./contents")
for file in path.iterdir():
    if file.is_file():
        data.append(file.read_text())

In [29]:
data

['Menu of the restaurant\n\n1. Patatas Bravas \nDescription: Crispy fried potatoes topped with a spicy tomato sauce and a creamy garlic aioli. \nIngredients: Potatoes, tomato sauce, garlic, olive oil, mayonnaise, paprika.\n\n2. Paella de Mariscos \nDescription: A traditional Spanish rice dish filled with a variety of fresh seafood. \nIngredients: Rice, shrimp, mussels, clams, squid, saffron, garlic, olive oil, bell peppers, tomatoes.\n\n3. Gazpacho Andaluz \nDescription: A refreshing cold tomato soup perfect for a hot day. \nIngredients: Tomatoes, cucumbers, bell peppers, onions, garlic, olive oil, vinegar, salt.\n\n4. Churros con Chocolate \nDescription: Fried dough pastries dusted with sugar and served with a rich chocolate dipping sauce. \nIngredients: Flour, water, sugar, cinnamon, dark chocolate, milk.\n\n5. Tortilla Española \nDescription: A classic Spanish omelette made with potatoes and onions. \nIngredients: Eggs, potatoes, onions, olive oil, salt.',
 'About me\n\nI am Jamón, 

In [30]:
data_embeddings = get_embedding(session, EMBEDDING_MODEL, data)
data_embeddings["embeddings"]

[[-0.01966265,
  0.09049757,
  -0.19104382,
  0.027474608,
  0.020606115,
  -0.02185858,
  -0.011541874,
  -0.024420936,
  -0.009147898,
  -0.01961362,
  0.0072602765,
  0.02089015,
  0.0612601,
  0.048945557,
  -0.009947001,
  -0.0679581,
  0.006497176,
  -0.016915346,
  -0.00077573647,
  0.100224,
  0.045754943,
  0.0015781198,
  -0.04697007,
  -0.036645606,
  0.00033288254,
  0.07069832,
  -0.055915304,
  0.1189572,
  -0.020754334,
  -0.057842575,
  -0.042057812,
  -0.055680502,
  0.0041514663,
  0.039379794,
  0.032272734,
  -0.061857734,
  0.061021797,
  0.055727106,
  -0.013566586,
  -0.014171825,
  0.027281918,
  0.061673835,
  0.021932252,
  0.012101878,
  0.00906209,
  -0.013307813,
  0.028033543,
  0.026093878,
  -0.039128404,
  -0.027201744,
  0.022599049,
  -0.013791466,
  -0.007266992,
  -0.05690129,
  -0.05008825,
  -0.020544674,
  0.016507093,
  0.03548959,
  0.07628417,
  0.017085457,
  0.07667007,
  0.017442958,
  -0.012877312,
  0.07823843,
  0.017231584,
  -0.0272051

Create the collection to store the embeddings

In [27]:
from qdrant_client.models import Distance, VectorParams

client.create_collection(
    collection_name="restaurant",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE)
)

True

Store the embeddings with a payload, which is the text you want to store.

In [31]:
from qdrant_client.models import PointStruct

points = []
for idx, (text, embedding) in enumerate(zip(data, data_embeddings["embeddings"])):
    points.append(PointStruct(
        id=idx,
        vector=embedding,
        payload={"text": text}
    ))

client.upsert(
    collection_name="restaurant",
    points=points
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

Query the vector database, for the most similar result.

In [36]:
query_embedding = get_embedding(session, EMBEDDING_MODEL, ["What's on the menu?"])
query_embedding

{'model': 'nomic-embed-text',
 'embeddings': [[-0.04225292,
   0.063808724,
   -0.17290576,
   0.007884145,
   0.0091765,
   0.025426593,
   -0.021896845,
   -0.0058796047,
   -0.02368188,
   0.00026340655,
   0.0005915036,
   0.055631895,
   0.029263457,
   0.08919446,
   -0.0036082126,
   -0.067601815,
   -0.06236002,
   -0.069059916,
   0.03560842,
   0.025567284,
   0.059261613,
   -0.015979333,
   -0.045546975,
   -0.0074924277,
   0.07848801,
   0.044206273,
   -0.01754358,
   0.08525302,
   -0.017183024,
   0.007911777,
   -0.04832706,
   -0.01394169,
   0.013260918,
   0.006448792,
   0.007459775,
   -0.061152123,
   0.038530055,
   0.0451854,
   -0.0019173301,
   0.01663687,
   -0.019092921,
   0.033022635,
   0.0033406818,
   -0.06191608,
   0.034133367,
   0.0215087,
   0.0035700444,
   0.028607992,
   -0.04668644,
   -0.009163638,
   0.0070640887,
   -0.016789803,
   0.0036130338,
   -0.026781412,
   0.027596684,
   0.023346297,
   0.070163935,
   0.036597688,
   0.04160151

In [37]:
results = client.search(
    collection_name="restaurant",
    query_vector=query_embedding["embeddings"][0],
    limit=1
)
results

[ScoredPoint(id=0, version=0, score=0.63567466, payload={'text': 'Menu of the restaurant\n\n1. Patatas Bravas \nDescription: Crispy fried potatoes topped with a spicy tomato sauce and a creamy garlic aioli. \nIngredients: Potatoes, tomato sauce, garlic, olive oil, mayonnaise, paprika.\n\n2. Paella de Mariscos \nDescription: A traditional Spanish rice dish filled with a variety of fresh seafood. \nIngredients: Rice, shrimp, mussels, clams, squid, saffron, garlic, olive oil, bell peppers, tomatoes.\n\n3. Gazpacho Andaluz \nDescription: A refreshing cold tomato soup perfect for a hot day. \nIngredients: Tomatoes, cucumbers, bell peppers, onions, garlic, olive oil, vinegar, salt.\n\n4. Churros con Chocolate \nDescription: Fried dough pastries dusted with sugar and served with a rich chocolate dipping sauce. \nIngredients: Flour, water, sugar, cinnamon, dark chocolate, milk.\n\n5. Tortilla Española \nDescription: A classic Spanish omelette made with potatoes and onions. \nIngredients: Eggs,