In [None]:
############# Load data from the CSV file ######################

In [14]:
import pandas as pd

In [15]:
df=pd.read_csv('top_rated_wines.csv')
df.head()

Unnamed: 0,name,region,variety,rating,notes
0,3 Rings Reserve Shiraz 2004,"Barossa Valley, Barossa, South Australia, Aust...",Red Wine,96.0,Vintage Comments : Classic Barossa vintage con...
1,Abreu Vineyards Cappella 2007,"Napa Valley, California",Red Wine,96.0,Cappella is a proprietary blend of two clones ...
2,Abreu Vineyards Cappella 2010,"Napa Valley, California",Red Wine,98.0,Cappella is one of the oldest vineyard sites i...
3,Abreu Vineyards Howell Mountain 2008,"Howell Mountain, Napa Valley, California",Red Wine,96.0,When David purchased this Howell Mountain prop...
4,Abreu Vineyards Howell Mountain 2009,"Howell Mountain, Napa Valley, California",Red Wine,98.0,"As a set of wines, it is hard to surpass the f..."


In [16]:
df.describe()

Unnamed: 0,rating
count,1365.0
mean,96.859341
std,0.995957
min,96.0
25%,96.0
50%,97.0
75%,98.0
max,99.0


In [17]:
#remove missing or undefined values from the dataset
df=df[df['variety'].notna()]

In [18]:
# create a dictionary from the dataframe
data=df.to_dict('records')

In [5]:
!pip install qdrant-client==1.3.0 # New versions doesn't support typical search command. So Qdrant client 1.3.0 was used

Collecting qdrant-client==1.3.0
  Using cached qdrant_client-1.3.0-py3-none-any.whl.metadata (7.4 kB)
Using cached qdrant_client-1.3.0-py3-none-any.whl (132 kB)
Installing collected packages: qdrant-client
Successfully installed qdrant-client-1.3.0


In [6]:
#create an in-memory vector database

from qdrant_client import QdrantClient
client = QdrantClient(":memory:")

In [None]:
########### Encoding data to numeric values to store in the vector database #################

In [7]:
from sentence_transformers import SentenceTransformer

In [8]:
encoder=SentenceTransformer('all-MiniLM-L6-v2') #model to create embeddings



Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [11]:
from qdrant_client import models

In [12]:
# create a collection (similar to tables in a relational database) to store wine details in the vector db
# size of the embeddings is determined by the encoding model
# algorithm to measure the similarity of the vectors (vectors from the user input and vectors saved in the db) is determined by the vector db client model.
# But cosine distance, dot product, euclidian distance are some common distance measuring methods

client.create_collection(
collection_name="top_wines",
    vectors_config=models.VectorParams(
    size=encoder.get_sentence_embedding_dimension(), #vector size is defined by the used model
    distance=models.Distance.COSINE
    )
)


True

In [19]:
# upload the dictionary of wines to the vector database
client.upload_records(
    collection_name="top_wines",
    records=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc
        )
        for idx, doc in enumerate(data)
    ]
)

In [21]:
# Example Search in the vector database with a query
hits=client.search(
collection_name="top_wines",
    query_vector=encoder.encode("A wine from Mendoza Argentina").tolist(),
    limit=3
)
for hit in hits:
    print(hit.payload,"score:",hit.score)

{'name': 'Catena Zapata Nicasia Vineyard Malbec 2004', 'region': 'Argentina', 'variety': 'Red Wine', 'rating': 96.0, 'notes': '"The single-vineyard 2004 Malbec Nicasia Vineyard is located in the Altamira district of Mendoza. It was aged for 18 months in new French oak. Opaque purple-colored, it exhibits a complex perfume of pain grille, scorched earth, mineral, licorice, blueberry, and black cherry. Thick on the palate, bordering on opulent, it has layers of fruit, silky tannins, and a long, fruit-filled finish. It will age effortlessly for another 6-8 years and provide pleasure through 2025. When all is said and done, Catena Zapata is the Argentina winery of reference – the standard of excellence for comparing all others. The brilliant, forward-thinking Nicolas Catena remains in charge, with his daughter, Laura, playing an increasingly large role. The Catena Zapata winery is an essential destination for fans of both architecture and wine in Mendoza. It is hard to believe, given the su

In [23]:
user_prompt = "Suggest me an amazing Malbec wine from Argentina"
query_vector = encoder.encode(user_prompt).tolist()
hits = client.search(
   collection_name="top_wines",
   query_vector=query_vector,
   limit=3  # Return 5 closest points
)
for hit in hits:
    print(hit.payload,"score:",hit.score)

{'name': 'Catena Zapata Argentino Vineyard Malbec 2004', 'region': 'Argentina', 'variety': 'Red Wine', 'rating': 98.0, 'notes': '"The single-vineyard 2004 Malbec Argentino Vineyard spent 17 months in new French oak. Remarkably fragrant and complex aromatically, it offers up aromas of wood smoke, creosote, pepper, clove, black cherry, and blackberry. Made in a similar, elegant style, it is the most structured of the three single vineyard wines, needing a minimum of a decade of additional cellaring. It should easily prove to be a 25-40 year wine. It is an exceptional achievement in Malbec. When all is said and done, Catena Zapata is the Argentina winery of reference – the standard of excellence for comparing all others. The brilliant, forward-thinking Nicolas Catena remains in charge, with his daughter, Laura, playing an increasingly large role. The Catena Zapata winery is an essential destination for fans of both architecture and wine in Mendoza. It is hard to believe, given the surge i

In [24]:
# RAG
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]

In [25]:
# Take user prompt, append search results retrieved from vector database to improve the context and send both to the LLM
# A locally running light weight llm is used - Llama-3.2-3B-Instruct.Q6_K. It was downloaded and was running in the machine

import requests
import json

# --- Your local LLaMA API server ---
url = "http://127.0.0.1:8080/v1/chat/completions"  # your server URL

# --- prompt ---
messages = [
    {"role": "system", "content": "You are a chatbot, a wine specialist. Your top priority is to help guide users select amazing wine and guide them with their requests."},
    {"role": "user", "content": "Suggest me an amazing Malbec wine from Argentina"},
    {"role": "assistant", "content": str(search_results)}  # search results from Qdrant
]

data = {
    "model": "LLaMA_CPP",  # name your running model (depends on server config)
    "messages": messages,
    "temperature": 0.7,
    "max_tokens": 300
}

# --- Call the API ---
response = requests.post(url, headers={"Content-Type": "application/json"}, data=json.dumps(data))
result = response.json()

# --- Print model output ---
print(result['choices'][0]['message']['content'])

Based on my knowledge, I highly recommend the **Catena Zapata Adrianna Vineyard Malbec 2004**. This wine is a masterpiece from one of Argentina's most renowned wineries, and it's a great representation of the country's Malbec style.

This wine is known for its:

* Inky purple color
* Aromas of wood smoke, pencil lead, game, black cherry, and blackberry liqueur
* Full-bodied and opulent texture
* Complex flavors that balance richness with finesse
* Aging potential of at least a decade, with the potential to evolve for 25-40 years

The Adrianna Vineyard is a single-vineyard site that showcases the best of Malbec's terroir, and the 2004 vintage is considered one of the greatest in the region.

If you're looking for a truly exceptional Malbec experience, I think this wine is an excellent choice. Would you like to know more about the winemaking process, the vineyard, or perhaps some pairing suggestions?<|eot_id|>
