In [1]:
import pandas as pd
df = pd.read_csv('../../top_rated_wines.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
#data = df.sample(700).to_dict('records') # Get only 700 records. More records will make it slower to index
data = df.to_dict('records')
len(data)

1347

In [2]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [3]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [5]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [6]:
# Create collection to store wines
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

True

In [6]:
# NOTE dchesnokov: doesn't work on WSL with default packages versions from master repo; see below variant
# vectorize!
qdrant.upload_points(
    collection_name="top_wines",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

AttributeError: 'QdrantClient' object has no attribute 'upload_points'

In [7]:
# vectorize!
from qdrant_client.models import Record
qdrant.upload_records(
    collection_name="top_wines",
    records=[
        Record(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [17]:
#note='"The single-vineyard 2004 Malbec Argentino Vineyard spent 17 months in new French oak. Remarkably fragrant and complex aromatically, it offers up aromas of wood smoke, creosote, pepper, clove, black cherry, and blackberry. Made in a similar, elegant style, it is the most structured of the three single vineyard wines, needing a minimum of a decade of additional cellaring. It should easily prove to be a 25-40 year wine. It is an exceptional achievement in Malbec. When all is said and done, Catena Zapata is the Argentina winery of reference – the standard of excellence for comparing all others. The brilliant, forward-thinking Nicolas Catena remains in charge, with his daughter, Laura, playing an increasingly large role. The Catena Zapata winery is an essential destination for fans of both architecture and wine in Mendoza. It is hard to believe, given the surge in popularity of Malbec in recent years, that Catena Zapata only began exporting Malbec to the United States in 1994."'
note='The single-vineyard 2004 Malbec Argentino Vineyard spent 17 months in new French oak.'
vector=encoder.encode(note).tolist()
print(len(vector))
vector

384


[0.09278937429189682,
 -0.03437332063913345,
 -0.023236652836203575,
 0.03904687613248825,
 0.07786499708890915,
 -0.05631914362311363,
 -0.08247490227222443,
 0.0124586783349514,
 -0.009760531596839428,
 0.0089627830311656,
 0.05113108828663826,
 -0.033050794154405594,
 -0.08469725400209427,
 -0.01742677576839924,
 -0.06163802742958069,
 -0.027323570102453232,
 -0.014462834224104881,
 -0.07182099670171738,
 0.0899398922920227,
 -0.032412659376859665,
 0.02396087720990181,
 -0.05288434401154518,
 -0.03707241639494896,
 -0.001883350545540452,
 0.07434549182653427,
 -0.10071758925914764,
 -0.05894303321838379,
 0.07110700756311417,
 -0.019971609115600586,
 -0.03508472070097923,
 0.06790073215961456,
 0.15782441198825836,
 0.0488547682762146,
 0.014963299036026001,
 -0.07964525371789932,
 -0.024129023775458336,
 0.02818286046385765,
 -0.07700701057910919,
 0.008323095738887787,
 -0.02085087262094021,
 -0.012773080728948116,
 0.025347668677568436,
 -0.059976305812597275,
 0.073944739997386

In [8]:
user_prompt = "Suggest me an amazing Malbec wine from Argentina"

In [9]:
# Search time for awesome wines!

hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'name': 'Catena Zapata Argentino Vineyard Malbec 2004', 'region': 'Argentina', 'variety': 'Red Wine', 'rating': 98.0, 'notes': '"The single-vineyard 2004 Malbec Argentino Vineyard spent 17 months in new French oak. Remarkably fragrant and complex aromatically, it offers up aromas of wood smoke, creosote, pepper, clove, black cherry, and blackberry. Made in a similar, elegant style, it is the most structured of the three single vineyard wines, needing a minimum of a decade of additional cellaring. It should easily prove to be a 25-40 year wine. It is an exceptional achievement in Malbec. When all is said and done, Catena Zapata is the Argentina winery of reference – the standard of excellence for comparing all others. The brilliant, forward-thinking Nicolas Catena remains in charge, with his daughter, Laura, playing an increasingly large role. The Catena Zapata winery is an essential destination for fans of both architecture and wine in Mendoza. It is hard to believe, given the surge i

In [10]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]
print(str(search_results))

[{'name': 'Catena Zapata Argentino Vineyard Malbec 2004', 'region': 'Argentina', 'variety': 'Red Wine', 'rating': 98.0, 'notes': '"The single-vineyard 2004 Malbec Argentino Vineyard spent 17 months in new French oak. Remarkably fragrant and complex aromatically, it offers up aromas of wood smoke, creosote, pepper, clove, black cherry, and blackberry. Made in a similar, elegant style, it is the most structured of the three single vineyard wines, needing a minimum of a decade of additional cellaring. It should easily prove to be a 25-40 year wine. It is an exceptional achievement in Malbec. When all is said and done, Catena Zapata is the Argentina winery of reference – the standard of excellence for comparing all others. The brilliant, forward-thinking Nicolas Catena remains in charge, with his daughter, Laura, playing an increasingly large role. The Catena Zapata winery is an essential destination for fans of both architecture and wine in Mendoza. It is hard to believe, given the surge 

In [14]:
# NOTE dchesnokov: this doesn't work on WSL; see below variant

# Now time to connect to the local large language model
from openai import OpenAI
client = OpenAI(
    base_url="http://127.0.0.1:8080/v1", # "http://<Your api-server IP>:port"
    api_key = "sk-no-key-required"
)
completion = client.chat.completions.create(
    model="LLaMA_CPP",
    messages=[
        {"role": "system", "content": "You are chatbot, a wine specialist. Your top priority is to help guide users into selecting amazing wine and guide them with their requests."},
        {"role": "user", "content": "Suggest me an amazing Malbec wine from Argentina"},
        {"role": "assistant", "content": str(search_results)}
    ]
)
print(completion.choices[0].message)

APIConnectionError: Connection error.

In [22]:
# NOTE dchesnokov for windows WSL:
# - start LLama with --host 0.0.0.0
# - find WSL 'proxy' "ip route show | grep -i default | awk '{ print $3}'" -> e.g. 172.17.128.1
# - use above found address to connect to LLama via OpenAI client

# Now time to connect to the local large language model
from openai import OpenAI
client = OpenAI(
    base_url="http://172.17.128.1:8080/v1", # "http://<Your api-server IP>:port"
    api_key = "sk-no-key-required"
)
completion = client.chat.completions.create(
    model="LLaMA_CPP",
    messages=[
        {"role": "system", "content": "You are chatbot, a wine specialist. Your top priority is to help guide users into selecting amazing wine and guide them with their requests."},
        {"role": "user", "content": "Suggest me an amazing Malbec wine from Argentina"},
        #{"role": "assistant", "content": str(search_results)}
        {"role": "assistant", "content": "Certainly! One of my favorite Malbecs from Argentina is the 2018 Bodega Norton Malbec. It's made from grapes grown in the Mendoza region of Argentina and has a rich, full-bodied flavor with notes of blackberry, plum, and chocolate. The tannins are smooth and well-balanced, making it an excellent choice for those who enjoy bold, full-bodied wines. If you're looking for something a bit different, you might also enjoy the 2018 Catena Zapata Malbec, which has a lighter body and bright acidity, making it a great option for pairing with grilled meats or fish."},
        {"role": "user", "content": "Can you tell me more about that? Any ratings available?"},
    ]
)
print(completion.choices[0].message)

ChatCompletionMessage(content='Of course! The 2018 Bodega Norton Malbec received a rating of 94 points from Wine Spectator, which called it "a full-bodied red with dark fruit, chocolate and spice flavors, and a long, velvety finish." It also received a rating of 93 points from Robert Parker\'s Wine Advocate, which noted that it "offers a great balance between ripe fruit and acidity, with a long and persistent finish." These ratings indicate that this wine is of exceptional quality and worth trying if you\'re a fan of full-bodied Malbecs from Argentina.', role='assistant', function_call=None, tool_calls=None)
