In [None]:
!pip install -qU \
  transformers==4.31.0 \
  sentence-transformers==2.2.2 \
  pinecone-client==2.2.2 \
  langchain==0.0.240

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m55.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.1/179.1 kB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m75.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.1/311.1 kB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m63.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m65.3 MB/s[0m 

# Chunking

In [None]:
import pandas as pd

wine = pd.read_csv('wine_cleaned_rev_concat.csv')

In [None]:
wine.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84502 entries, 0 to 84501
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              84502 non-null  int64  
 1   country         84502 non-null  object 
 2   description     84502 non-null  object 
 3   designation     84502 non-null  object 
 4   points          84502 non-null  float64
 5   price           78719 non-null  float64
 6   province        84502 non-null  object 
 7   title           84502 non-null  object 
 8   variety         84502 non-null  object 
 9   winery          84502 non-null  object 
 10  region_cleaned  84502 non-null  object 
 11  style1          84502 non-null  object 
 12  style2          84502 non-null  object 
dtypes: float64(2), int64(1), object(10)
memory usage: 8.4+ MB


We need to split long descriptions into chunks, because most text embedding models have a cap on the number of characters they can handle. Many have a cap of 256 characters, so we split long decsriptions into chunks of size no larger than 250 characters.

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Define a custom text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=250,
    chunk_overlap=100,
    length_function=len,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)

Let's see how it works on an individual paragraph:

In [None]:
# See how it works
text_splitter.split_text(wine.iloc[8474]['description'])

['Offering a more traditional California Zinfandel style than some of the other 2011s, this takes off with scents of a warm, butter-crusted cherry pie, unfolding on the palate with cranberry jelly.',
 "There's a slight herbal character and quite of bit of tannic grip, so expect this one to improve in the years to come."]

Now let's chunk all descriptions, keeping track of the indices:

In [None]:
from tqdm import tqdm

# Initialize an empty list to store the split chunks
documents = []

# Iterate through each row in the 'wine' DataFrame
for idx, row in tqdm(enumerate(wine.itertuples()), total=len(wine)):
    # Apply text splitting to the 'description' column of the current row
    chunks = text_splitter.split_text(row.description)

    # Iterate through the split chunks and add them to the 'documents' list
    for i, chunk in enumerate(chunks):
        documents.append({
            'id': f'{idx}-{i}',
            'description': chunk,
            'title': row.title,
            'region': row.region_cleaned,
            'winery': row.winery,
            'variety': row.variety,
            'province': row.province,
            'price': row.price,
            'designation': row.designation,
            'country': row.country,
            'style1': row.style1,
            'style2': row.style2
       })

# Calculate the total number of split documents
total_documents = len(documents)
print(f"Total split documents: {total_documents}")


100%|██████████| 84502/84502 [00:02<00:00, 41706.95it/s]

Total split documents: 126862





Look at what a single 'document' looks like:

In [None]:
documents[19]

{'id': '16-0',
 'description': 'This wine from the Geneseo district offers aromas of sour plums and just enough cigar box to tempt the nose.',
 'title': 'Bianchi 2011 Signature Selection Merlot (Paso Robles)',
 'region': 'Paso Robles',
 'winery': 'Bianchi',
 'variety': 'Merlot',
 'province': 'California',
 'price': 22.0,
 'designation': 'Signature Selection',
 'country': 'US',
 'style1': 'Merlot - California',
 'style2': 'Merlot - Paso Robles'}

Now let's create a DataFrame for the split chunks (documents):

In [None]:
split_wine = pd.DataFrame(documents)

In [None]:
split_wine.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126862 entries, 0 to 126861
Data columns (total 12 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   id           126862 non-null  object 
 1   description  126862 non-null  object 
 2   title        126862 non-null  object 
 3   region       126862 non-null  object 
 4   winery       126862 non-null  object 
 5   variety      126862 non-null  object 
 6   province     126862 non-null  object 
 7   price        119376 non-null  float64
 8   designation  126862 non-null  object 
 9   country      126862 non-null  object 
 10  style1       126862 non-null  object 
 11  style2       126862 non-null  object 
dtypes: float64(1), object(11)
memory usage: 11.6+ MB


Here's an illustration of how a long description got split:

In [None]:
split_wine.loc[split_wine['title'] == 'Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma County)']

Unnamed: 0,id,description,title,region,winery,variety,province,price,designation,country,style1,style2
3272,2236-0,"Creamy, lush and somewhat robust, this dry spa...",Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Sonoma,Gloria Ferrer,Sparkling Blend,California,21.333333,Sonoma Brut,US,Sparkling Blend - California,Sparkling Blend - Sonoma
3273,2236-1,"Made predominantly from Pinot Noir, this is an...",Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Sonoma,Gloria Ferrer,Sparkling Blend,California,21.333333,Sonoma Brut,US,Sparkling Blend - California,Sparkling Blend - Sonoma
3274,2236-2,It also has a rich inviting mousse embedded wi...,Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Sonoma,Gloria Ferrer,Sparkling Blend,California,21.333333,Sonoma Brut,US,Sparkling Blend - California,Sparkling Blend - Sonoma
3275,2236-3,"Easy to like for its array of raspberries, lim...",Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Sonoma,Gloria Ferrer,Sparkling Blend,California,21.333333,Sonoma Brut,US,Sparkling Blend - California,Sparkling Blend - Sonoma
3276,2236-4,"Made from mostly Pinot Noir grapes, with an ad...",Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Sonoma,Gloria Ferrer,Sparkling Blend,California,21.333333,Sonoma Brut,US,Sparkling Blend - California,Sparkling Blend - Sonoma
3277,2236-5,The finish offers a taste of toast with ginger...,Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Sonoma,Gloria Ferrer,Sparkling Blend,California,21.333333,Sonoma Brut,US,Sparkling Blend - California,Sparkling Blend - Sonoma
3278,2236-6,"The blend is mostly Pinot Noir, with about 9% ...",Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Sonoma,Gloria Ferrer,Sparkling Blend,California,21.333333,Sonoma Brut,US,Sparkling Blend - California,Sparkling Blend - Sonoma
3279,2236-7,It's an easy bubbly to drink at parties. A goo...,Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Sonoma,Gloria Ferrer,Sparkling Blend,California,21.333333,Sonoma Brut,US,Sparkling Blend - California,Sparkling Blend - Sonoma
3280,2236-8,This is a great price for a sparkling wine of ...,Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Sonoma,Gloria Ferrer,Sparkling Blend,California,21.333333,Sonoma Brut,US,Sparkling Blend - California,Sparkling Blend - Sonoma
3281,2236-9,"The finish ends sweet, begging for spicy food ...",Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma...,Sonoma,Gloria Ferrer,Sparkling Blend,California,21.333333,Sonoma Brut,US,Sparkling Blend - California,Sparkling Blend - Sonoma


Lastly, we need to fill NaN values, because Pinecone cannot handle them.

In [None]:
split_wine.fillna("unknown", inplace=True)

In [None]:
# Save to a csv file
split_wine.to_csv('split_wine.csv', index=False)

# Setting up Embeddings Model

We will use the `all-MiniLM-L6-v2` model to create embeddings.

In [None]:
from torch import cuda
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}
)

(…)f3d3c277d6e90027e55de9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

(…)7d6e90027e55de9125/1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

(…)e2f80f3d3c277d6e90027e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

(…)f80f3d3c277d6e90027e55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

(…)de9125/config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

(…)d3c277d6e90027e55de9125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

(…)90027e55de9125/sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

(…)6e90027e55de9125/special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

(…)f3d3c277d6e90027e55de9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

(…)7d6e90027e55de9125/tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

(…)3d3c277d6e90027e55de9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

(…)e2f80f3d3c277d6e90027e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

(…)80f3d3c277d6e90027e55de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

See how it works:

In [None]:
docs = [
    "this is one document",
    "and another document"
]

embeddings = embed_model.embed_documents(docs)

print(f"We have {len(embeddings)} doc embeddings, each with "
      f"a dimensionality of {len(embeddings[0])}.")

We have 2 doc embeddings, each with a dimensionality of 384.


# Indexing and Querying with Pinecone

Initialize access to Pinecone:

In [None]:
import os
import pinecone

# get API key from app.pinecone.io and environment from console
pinecone.init(
    api_key=os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY',
    environment=os.environ.get('PINECONE_ENVIRONMENT') or 'PINECONE_ENVIRONMENT'
)

See what indexes exist already:

In [None]:
pinecone.list_indexes()

['rag']

Create a new index:

In [None]:
import time

index_name = 'rag'

if index_name not in pinecone.list_indexes():
    pinecone.create_index(
        index_name,
        dimension=len(embeddings[0]),
        metric='cosine'
    )
    # wait for index to finish initialization
    while not pinecone.describe_index(index_name).status['ready']:
        time.sleep(1)

Now connect to the index:

In [None]:
index = pinecone.Index(index_name)
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.4,
 'namespaces': {'': {'vector_count': 126862}},
 'total_vector_count': 126862}

Create embeddings for 'description' and send them to Pinecone (if not already):

In [None]:
# Define the batch size
batch_size = 32

# Create a loop to process data in batches
for i in tqdm(range(0, len(split_wine), batch_size)):
    i_end = min(len(split_wine), i + batch_size)
    batch = split_wine.iloc[i:i_end]

    # Extract data from the batch
    ids = [f"{x['id']}" for _, x in batch.iterrows()]
    texts = [x['description'] for _, x in batch.iterrows()]

    metadata = [
        {
            'text': x['description'],
            'title': x['title'],
            'region': x['region'],
            'winery': x['winery'],
            'variety': x['variety'],
            'province': x['province'],
            'price': x['price'],
            'designation': x['designation'],
            'country': x['country'],
            'style1' : x['style1'],
            'style2' : x['style2']
        }
        for _, x in batch.iterrows()
    ]

    # Embed the 'texts' using 'embed_model'
    embeds = embed_model.embed_documents(texts)

    # Prepare the vectors as a list of tuples
    vectors = [(id, embed, meta) for id, embed, meta in zip(ids, embeds, metadata)]

    # Add the vectors to Pinecone using index.upsert()
    index.upsert(vectors=vectors)


100%|██████████| 3965/3965 [21:15<00:00,  3.11it/s]


We can now query the indexed data like this. Here, we used the filter `"country": 'France'`, so only wines from France will appear.



In [None]:
query = "Fruity rich wine"

embedding = embed_model.embed_query(query)

result = index.query(
    vector=embedding,
    filter={
        "country": 'France'
    },
    top_k=5,
    include_metadata=True
)

The plain result looks like this:

In [None]:
result

{'matches': [{'id': '17616-0',
              'metadata': {'country': 'France',
                           'designation': 'Flower Label',
                           'price': 13.0,
                           'province': 'Beaujolais',
                           'region': 'Beaujolais-Villages',
                           'style1': 'Gamay - Beaujolais',
                           'style2': 'Gamay - Beaujolais',
                           'text': 'With rich fruits, this is a ripe and '
                                   'full-bodied wine. With ease and '
                                   'concentration, it handles considerable '
                                   'youthful acidity plus berry fruits and '
                                   'structure. It should be fully integrated '
                                   'by the beginning of 2015.',
                           'title': 'Georges Duboeuf 2013 Flower Label  '
                                    '(Beaujolais-Villages)',
             

We can also make it look more presentable:

In [None]:
# Extract relevant information
data = [
    {
        'id': match['id'],
        'score': match['score'],
        **match['metadata']  # Unpack metadata into the dictionary
    }
    for match in result['matches']
]

# Create a DataFrame
result_df = pd.DataFrame(data)

# Display the DataFrame
result_df.head()

Unnamed: 0,id,score,country,designation,price,province,region,style1,style2,text,title,variety,winery
0,17616-0,0.818701,France,Flower Label,13.0,Beaujolais,Beaujolais-Villages,Gamay - Beaujolais,Gamay - Beaujolais,"With rich fruits, this is a ripe and full-bodi...",Georges Duboeuf 2013 Flower Label (Beaujolais...,Gamay,Georges Duboeuf
1,6140-0,0.806644,France,En Combe,36.0,Burgundy,Saint-Véran,Chardonnay - Burgundy,Chardonnay - Burgundy,"Big and fruity, this rich wine offers warm tro...",Bret Brothers 2012 En Combe (Saint-Véran),Chardonnay,Bret Brothers
2,9467-0,0.801874,France,Pêche au Carrelet,33.0,Bordeaux,Graves,Bordeaux-style White Blend - Bordeaux,Bordeaux-style White Blend - Bordeaux,"A big, fruity bold wine, packed with the ripes...",Château Haut-Peyrous 2008 Pêche au Carrelet (...,Bordeaux-style White Blend,Château Haut-Peyrous
3,67304-0,0.800019,France,Vieilles Vignes,18.0,Southwest France,Madiran,Tannat - Southwest France,Tannat - Southwest France,A delicious wine whose richness is matched by ...,Château Peyros 2005 Vieilles Vignes Tannat (Ma...,Tannat,Château Peyros
4,24392-0,0.797473,France,Les Puechs,17.0,Provence,Côtes de Provence,Rosé - Provence,Rosé - Provence,"Produced only in top years, this rich wine is ...",Château la Vivonne 2016 Les Puechs Rosé (Côtes...,Rosé,Château la Vivonne


Recall that we split long descriptions into chunks, and so in the result above, full descriptions will not appear unless they were short enough and didn't get chunked. To get full descriptions, we can do this:

In [None]:
# Extract the "n" values from the "id" column in df
result_df['n_value'] = result_df['id'].str.split('-').str[0].astype(int)

# Filter rows in wine where the index is in the "n_value" column of df
filtered_wine = wine[wine.index.isin(result_df['n_value'])]

# Drop the temporary "n_value" column
result_df.drop('n_value', axis=1, inplace=True)

# Display the resulting DataFrame
filtered_wine


Unnamed: 0,id,country,description,designation,points,price,province,title,variety,winery,region_cleaned,style1,style2,n_value
6140,8797,France,"Big and fruity, this rich wine offers warm tro...",En Combe,89.0,36.0,Burgundy,Bret Brothers 2012 En Combe (Saint-Véran),Chardonnay,Bret Brothers,Saint-Véran,Chardonnay - Burgundy,Chardonnay - Burgundy,8797
9467,13418,France,"A big, fruity bold wine, packed with the ripes...",Pêche au Carrelet,90.0,33.0,Bordeaux,Château Haut-Peyrous 2008 Pêche au Carrelet (...,Bordeaux-style White Blend,Château Haut-Peyrous,Graves,Bordeaux-style White Blend - Bordeaux,Bordeaux-style White Blend - Bordeaux,13418
17616,25036,France,"With rich fruits, this is a ripe and full-bodi...",Flower Label,88.0,13.0,Beaujolais,Georges Duboeuf 2013 Flower Label (Beaujolais...,Gamay,Georges Duboeuf,Beaujolais-Villages,Gamay - Beaujolais,Gamay - Beaujolais,25036
24392,34992,France,"Produced only in top years, this rich wine is ...",Les Puechs,92.0,17.0,Provence,Château la Vivonne 2016 Les Puechs Rosé (Côtes...,Rosé,Château la Vivonne,Côtes de Provence,Rosé - Provence,Rosé - Provence,34992
67304,101619,France,A delicious wine whose richness is matched by ...,Vieilles Vignes,92.0,18.0,Southwest France,Château Peyros 2005 Vieilles Vignes Tannat (Ma...,Tannat,Château Peyros,Madiran,Tannat - Southwest France,Tannat - Southwest France,101619


# Embeddings with Faiss

Let's create a tensor file with description embeddings. If you use GPU, it takes only 20 minutes.

In [None]:
import math
import torch

# Extract the "description" column as a list
descriptions = split_wine['description'].tolist()

# Set batch size
batch_size = 100

# Calculate the number of batches needed
num_batches = math.ceil(len(descriptions) / batch_size)

# Initialize a list to store embeddings
description_embeddings = []

# Process data in batches with tqdm
for i in tqdm(range(num_batches), desc="Processing Batches"):
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, len(descriptions))
    batch_descriptions = descriptions[start_idx:end_idx]

    # Encode the batch of descriptions using the model
    batch_embeddings = embed_model.embed_documents(batch_descriptions)

    # Append the batch embeddings to the list
    description_embeddings.extend(batch_embeddings)

# Convert the list of embeddings to a PyTorch tensor
description_tensor = torch.tensor(description_embeddings)

# Save the tensor to a file
torch.save(description_tensor, 'description_embeddings.pt')


Processing Batches: 100%|██████████| 1269/1269 [01:41<00:00, 12.44it/s]


In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4


Now load our tensor file and initiate a list of labels:

In [None]:
knn_vectors = torch.load("description_embeddings.pt")
knn_labels = split_wine.id.astype(str).tolist()

Now we can create a Faiss index using our tensor file:

In [None]:
import faiss

# Convert the data to NumPy arrays for use with faiss
vectors_np = knn_vectors.numpy()

# Determine the dimension of the vectors
dimension = vectors_np.shape[1]  # This represents the dimension of the vectors

# Build the Faiss index with cosine similarity
index = faiss.IndexFlatIP(dimension)  # Create a Faiss index with cosine similarity
index.add(vectors_np)  # Add the data vectors to the index


And we can query the indexed data like this:

In [None]:
import numpy as np

# Define the text query
query_text = "Fruity rich wine"

# Embed the text query to obtain a vector using the embedding model
query_vector = embed_model.embed_query(query_text)

# Specify the number of neighbors to return
k = 5

# Perform a nearest neighbor search to find the closest neighbors to the query vector(s).
# D will contain cosine similarities between the query vector and its neighbors.
# I will contain the indices of the nearest neighbors in the dataset.
D, I = index.search(np.array([query_vector]), k)

# Get labels of the neighbors
neighbor_labels = [knn_labels[i] for i in I[0]]

# Extract rows from the wine DataFrame based on the indices of the nearest neighbors
faiss_result = split_wine.iloc[I[0]]

The result is exactly the same as with Pinecone (without filtering) if you check (which is expected).

In [None]:
faiss_result

Unnamed: 0,id,description,title,region,winery,variety,province,price,designation,country,style1,style2
74004,49227-0,"This is a rich, concentrated while also fruity...",Dona Maria-Júlio Bastos 2010 Grande Reserva Re...,Alentejano,Dona Maria-Júlio Bastos,Portuguese Red,Alentejano,45.0,Grande Reserva,Portugal,Portuguese Red - Alentejano,Portuguese Red - Alentejano
26617,17616-0,"With rich fruits, this is a ripe and full-bodi...",Georges Duboeuf 2013 Flower Label (Beaujolais...,Beaujolais-Villages,Georges Duboeuf,Gamay,Beaujolais,13.0,Flower Label,France,Gamay - Beaujolais,Gamay - Beaujolais
60107,39820-0,"Fruity, full-bodied wine, with tropical fruits...",J. Portugal Ramos 2007 Marques de Borba White ...,Alentejo,J. Portugal Ramos,Portuguese White,Alentejo,12.0,Marques de Borba,Portugal,Portuguese White - Alentejo,Portuguese White - Alentejo
113864,75917-0,"This is a big, fruity wine that has a fine bal...",Quinta Vale Dona Maria 2011 Late Bottled Vinta...,Port,Quinta Vale Dona Maria,Port,Port,,Late Bottled Vintage,Portugal,Port - Port,Port - Port
9252,6140-0,"Big and fruity, this rich wine offers warm tro...",Bret Brothers 2012 En Combe (Saint-Véran),Saint-Véran,Bret Brothers,Chardonnay,Burgundy,36.0,En Combe,France,Chardonnay - Burgundy,Chardonnay - Burgundy
