In [22]:
import pandas as pd
df = pd.read_csv('../../top_rated_wines.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
data = df.to_dict('records')
df

Unnamed: 0,name,region,variety,rating,notes
0,Abreu Vineyards Cappella 2010,"Napa Valley, California",Red Wine,98.0,Cappella is one of the oldest vineyard sites i...
1,Abreu Vineyards Howell Mountain 2009,"Howell Mountain, Napa Valley, California",Red Wine,98.0,"As a set of wines, it is hard to surpass the f..."
2,Abreu Vineyards Las Posadas Howell Mountain 2012,"Howell Mountain, Napa Valley, California",Red Wine,99.0,"At about 2000 feet elevation, Las Posadas sits..."
3,Abreu Vineyards Madrona Ranch 1996,"Napa Valley, California",Red Wine,98.0,Abreu Madrona Ranch is a blend of Cabernet Sau...
4,Abreu Vineyards Madrona Ranch 2005,"Napa Valley, California",Red Wine,98.0,Abreu Madrona Ranch is a blend of Cabernet Sau...
...,...,...,...,...,...
357,L'Aventure Estate Cuvee 2016,"Paso Robles, Central Coast, California",Red Wine,99.0,"Blend: 52% Syrah, 32% Cabernet Sauvignon, 16% ..."
358,Le Dome 2016,"St. Emilion, Bordeaux, France",Red Wine,98.0,"Blend: 80% Cabernet Franc, 20% Merlot"
359,Leeuwin Estate Art Series Chardonnay 2001,"Margaret River, Western Australia, Australia",White Wine,98.0,Number 24 on
360,Lewelling Wight Vineyard Cabernet Sauvignon 2008,"Napa Valley, California",Red Wine,98.0,"Sumptuous aromas of blackberry, cassis, Bing c..."


In [9]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [10]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [11]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [12]:
# Create collection to store books
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

True

In [23]:
# vectorize!
qdrant.upload_points(
    collection_name="top_wines",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [24]:
# Search time for awesome wines!

hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode("99 points Cabernet Sauvignon from Napa Valley").tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'name': "Anderson's Conn Valley Vineyards Cabernet Sauvignon Estate Reserve 2008", 'region': 'Napa Valley, California', 'variety': 'Red Wine', 'rating': 98.0, 'notes': '100% Cabernet Sauvignon, 100% Estate grown.'} score: 0.7232318357735968
{'name': "Anderson's Conn Valley Vineyards Cabernet Sauvignon Reserve (1.5 Liter Magnum) 2008", 'region': 'Napa Valley, California', 'variety': 'Red Wine', 'rating': 98.0, 'notes': '100% Cabernet Sauvignon, 100% Estate grown.'} score: 0.7232318357735968
{'name': "L'Aventure Estate Cuvee 2014", 'region': 'Central Coast, California', 'variety': 'Red Wine', 'rating': 98.0, 'notes': 'Blend: 50% Cabernet Sauvignon, 35% Syrah, 15% Petit Verdot'} score: 0.6844836784212434
