# Imports

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import json
import pycountry

# API Testing

In [32]:
r = requests.get(
    "https://www.vivino.com/api/explore/explore",
    params = {
        "country_code": "FR",
        "country_codes[]":"pt",
        "currency_code":"EUR",
        "grape_filter":"varietal",
        "min_rating":"1",
        "order_by":"price",
        "order":"asc",
        "page": 1,
        "price_range_max":"500",
        "price_range_min":"0",
        "wine_type_ids[]":"1"
    },
    headers= {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"
    }
)
results = [
    (
        t["vintage"]["wine"]["winery"]["name"], 
        f'{t["vintage"]["wine"]["name"]} {t["vintage"]["year"]}',
        t["vintage"]["statistics"]["ratings_average"],
        t["vintage"]["statistics"]["ratings_count"],
    )
    for t in r.json()["explore_vintage"]["matches"]
]
dataframe = pd.DataFrame(results,columns=['Winery','Wine','Rating','num_review'])

print(dataframe)


                     Winery                                  Wine  Rating  \
0                  Graça 28                          Reserva 2022     4.1   
1                 Vila Real                    Vale do Corgo 2021     3.8   
2    Casa Ermelinda Freitas      Sandstone Castelão - Shiraz 2023     4.0   
3                   Cartuxa                         EA Tinto 2021     3.8   
4   Quinta de São Sebastião         Miradouro da Vinha Tinto 2022     3.8   
5   Quinta de São Sebastião         Miradouro da Vinha Tinto 2021     3.8   
6         Casal das Freiras                   Colheita Tinto 2022     3.5   
7   Quinta de São Sebastião  Janela Branca Special Selection 2022     3.7   
8                 Vila Real                Cancellus Reserva 2017     3.8   
9       Quinta Dona Mafalda                   Little Mafalda 2022     3.8   
10            Monte do Além                     Petit Verdot 2012     3.8   
11                Vila Real              Cancellus Signature 2019     3.9   

# Vivino API Call

In [2]:
# Define function

def get_vivino_data(
    name=None,
    country_code=None,
    currency_code=None,
    min_rating=0,
    ratings_count_min=0,
    price_range_max=100000,
    price_range_min=0,
    wine_type_ids=1,
):
    r = requests.get(
        "https://www.vivino.com/api/explore/explore",
        params={
            "search_query": name,
            "country_code": country_code,
            "currency_code": currency_code,
            "min_rating": min_rating,
            "min_ratings": ratings_count_min,
            "order_by": "price",
            "order": "asc",
            "page": 1,
            "price_range_max": price_range_max,
            "price_range_min": price_range_min,
            "wine_type_ids[]": wine_type_ids,
        },
        headers={
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"
        },
    )


    results = [
        {
            "name": t["vintage"]["wine"]["winery"]["name"],
            "id": t["vintage"]["id"],
            "wine": f'{t["vintage"]["wine"]["name"]} {t["vintage"]["year"]}',
            "region": t["vintage"]["wine"]["region"]["name"],
            "country": t["vintage"]["wine"]["region"]["country"]["name"],
            "rating": t["vintage"]["statistics"]["ratings_average"],
            "flavors": [x["group"] for x in t["vintage"]["wine"]["taste"]["flavor"]],
            "foods": [x["name"] for x in t["vintage"]["wine"]["style"]["food"]],
            "rating_count": t["vintage"]["statistics"]["ratings_count"],
        }
        for t in r.json()["explore_vintage"]["matches"]
    ]

    # Get lists
    wineries = [x["name"] for x in results]
    links = ["https://vivino.com/wines/" + str(x["id"]) for x in results] # Will expand to a bigger link when searched
    wines = [x["wine"] for x in results]
    ratings = [x["rating"] for x in results]
    flavors = [x["flavors"] for x in results]
    foods = [x["foods"] for x in results]
    country_code = [x["country"] for x in results]
    region = [x["region"] for x in results]

    # Clean flavors by removing _ and capitalizing
    flavors = [[y.replace("_", " ").title() for y in x] for x in flavors]

    # Get number of reviews
    num_reviews = [x["rating_count"] for x in results]

    return pd.DataFrame(
        {
            "winery": wineries,
            "wine": wines,
            "link": links,
            "region": region,
            "country": country_code,
            "rating": ratings,
            "flavors": flavors,
            "foods": foods,
            "num_reviews": num_reviews,
        }
    )


# Get data
dataframe = get_vivino_data(
    name="",
    country_code="FR",
    currency_code="USD",
    min_rating=1,
    ratings_count_min=1000,
    price_range_max=500,
    price_range_min=0,
    wine_type_ids=1,
)

dataframe.head()

Unnamed: 0,winery,wine,link,region,country,rating,flavors,foods,num_reviews
0,Château de Lavagnac,Bordeaux 2015,https://vivino.com/wines/93384618,Bordeaux,France,3.6,"[Red Fruit, Black Fruit, Earth, Oak, Non Oak, ...","[Beef, Veal, Game (deer, venison), Poultry]",76
1,Château de Callac,Graves Rouge 2011,https://vivino.com/wines/2137427,Graves,France,3.6,"[Earth, Oak, Black Fruit, Non Oak, Red Fruit, ...","[Beef, Lamb, Game (deer, venison), Poultry]",221
2,Château Lacoste Garzac,Bordeaux 2022,https://vivino.com/wines/171614480,Bordeaux,France,3.4,"[Oak, Black Fruit, Earth, Non Oak, Red Fruit, ...","[Beef, Veal, Game (deer, venison), Poultry]",2054
3,Château Haut Bernin,Bordeaux 2015,https://vivino.com/wines/146357367,Bordeaux,France,3.3,"[Black Fruit, Red Fruit, Non Oak, Oak, Vegetal...","[Beef, Veal, Game (deer, venison), Poultry]",69
4,Château La Fleur Saint-Jean,Bordeaux 2009,https://vivino.com/wines/1667151,Bordeaux,France,3.6,"[Red Fruit, Spices, Oak, Black Fruit, Microbio]","[Beef, Veal, Game (deer, venison), Poultry]",168


In [3]:
import PyPDF2

def extract_text_from_pdf(pdf_path):
    """
    Extract text from a PDF file.
    
    Args:
        pdf_path (str): Path to the PDF file
        
    Returns:
        dict: Dictionary containing page numbers and their corresponding text
    """
    # Dictionary to store text from each page
    text_by_page = {}
    
    try:
        # Open the PDF file in binary read mode
        with open(pdf_path, 'rb') as file:
            # Create a PDF reader object
            pdf_reader = PyPDF2.PdfReader(file)
            
            # Get the number of pages
            num_pages = len(pdf_reader.pages)
            
            # Extract text from each page
            for page_num in range(num_pages):
                # Get the page object
                page = pdf_reader.pages[page_num]
                
                # Extract text from the page
                text = page.extract_text()
                
                # Store the text in our dictionary
                text_by_page[page_num + 1] = text
                
        return text_by_page
    
    # Error Messaging
    except FileNotFoundError:
        print(f"Error: The file {pdf_path} was not found.")
        return None
    except PyPDF2.PdfReadError:
        print("Error: Invalid or corrupted PDF file.")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {str(e)}")
        return None



In [4]:
print(extract_text_from_pdf("menus/coucou-wine.pdf"))

{1: "SPARKLING  |  FRANCE  |  CHAMPAGNE\nNON-VINTAGE\n1148 Alexandre Bonnet / 'La Geande' 7 Cepages' / Extra Brut NV 310\n1147 Alexandre Filaine / 'Cuvée Spéciale' / Brut NV 235\n1159 Alexandre Filaine / 'DMY' / Brut NV 295\n1116 Georges Laval / 'Cumières' 1er cru / Brut Nature NV 288\n1199 Laurent Perrier / 'Grand Siècle - No. 29' / Brut NV 685\nBLANC DE BLANCS\n1100 Pascal Agrapart / 'Minèral' / Extra Brut 2018 350\n1187 Pascal Agrapart / 'Vénus' / Extra Brut 2018 650\n1124 Raphaël et Vincent Bérêche  / 'Côte' /  Grand Cru /  Extra Brut 2005 498\n1203 Billecart-Salmon / 'Cuvée Louis Salmon' / Brut 2008 585\n1146 Franck Bonville / 'Les Belles Voyes' / Brut 2016 298\n1115 Ulysse Collin / 'Les Roises' / Extra Brut 2011 1450\n1168 Ulysse Collin / 'Les Pierrières' /  Brut 2018 725\n1106 Ulysse Collin / 'Les Pierrières' /  Brut 2011 1095\n1144 Emilien Feneuil / 'Cuvée Totum' / Extra Brut chardonnay -  petit meslier 2018 495\n1178 Emilien Feneuil / 'Cuvée Mix' / Extra Brut chardonnay -  pet

# Create PDF Parser

In [36]:
from dotenv import load_dotenv
import os

load_dotenv(dotenv_path='config.env')
google_key = os.getenv('GOOGLE_KEY')

if google_key is None:
    raise ValueError("GOOGLE_KEY not found in .env file")

In [37]:
import google.generativeai as genai
from typing import List, Dict
import json
import PyPDF2

# Made with Claude 3.5

class GeminiWineParser:
    def __init__(self, api_key: str):
        """Initialize the Gemini parser with API key"""
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel('gemini-1.5-pro')
        
    def parse_wine_list(self, text: str) -> List[Dict]:
        """
        Parse wine list text using Gemini 1.5
        
        Args:
            text (str): The wine list text to parse
            
        Returns:
            List[Dict]: List of parsed wine entries
        """
        prompt = f"""Extract wine information from the text below into a structured format.
        For each wine entry, extract:
        - ID number
        - Producer
        - Wine name
        - Type (e.g., NON-VINTAGE, BLANC DE BLANCS)
        - Main Type (e.g., SPARKLING, WHITE, RED, ROSE)
        - Region
        - Vintage (if available)
        - Price
        - Size (glass, bottle, half bottle, magnum)
        
        Format as JSON with missing fields as null but get as many wines as possible even if some fields are missing.
        
        Text to parse:
        {text}
        
        Respond with only valid JSON in this exact format:
        {{
            "wines": [
                {{
                    "id": "1234",
                    "producer": "Producer Name",
                    "name": "Wine Name",
                    "type": "Wine Type",
                    "region": "Region",
                    "country": "Country",
                    "vintage": "2020",
                    "price": "123",
                    "size": "bottle"
                }}
            ]
        }}"""

        try:
            response = self.model.generate_content(
                prompt,
                generation_config={
                    'temperature': 0.0,  # Use deterministic output
                    'top_p': 1.0,
                    'top_k': 1
                }
            )
            
            # Find the JSON in the response
            response_text = response.text
            # Look for JSON between ```json and ``` if present
            if '```json' in response_text:
                json_str = response_text.split('```json')[1].split('```')[0].strip()
            else:
                json_str = response_text.strip()
                
            # Parse the JSON response
            json_response = json.loads(json_str)
            return json_response["wines"]
            
        except Exception as e:
            print(f"Error parsing wine list: {str(e)}")
            return []

    def parse_pdf_and_wine_list(self, pdf_path: str, page_number: int = 1) -> List[Dict]:
        """
        Extract text from PDF and parse wine list
        
        Args:
            pdf_path (str): Path to PDF file
            page_number (int): Page number to parse (default: 1)
            
        Returns:
            List[Dict]: List of parsed wine entries
        """
        try:
            with open(pdf_path, 'rb') as file:
                reader = PyPDF2.PdfReader(file)
                if page_number <= len(reader.pages):
                    text = reader.pages[page_number - 1].extract_text()
                    return self.parse_wine_list(text)
                else:
                    raise ValueError(f"PDF has only {len(reader.pages)} pages")
        except Exception as e:
            print(f"Error processing PDF: {str(e)}")
            return []
    
    def save_to_json(self, wines: List[Dict], output_file: str):
        """Save parsed wines to a JSON file"""
        try:
            with open(output_file, 'w') as f:
                json.dump({'wines': wines}, f, indent=2)
            print(f"Successfully saved to {output_file}")
        except Exception as e:
            print(f"Error saving to file: {str(e)}")

# Add API Key
parser = GeminiWineParser(google_key)

# Example wine list text
sample_text = """SPARKLING  |  FRANCE  |  CHAMPAGNE
NON-VINTAGE
1148 Alexandre Bonnet / 'La Geande' 7 Cepages' / Extra Brut NV 310
1147 Alexandre Filaine / 'Cuvée Spéciale' / Brut NV 235
BLANC DE BLANCS
1100 Pascal Agrapart / 'Minèral' / Extra Brut 2018 350"""

# Parse text directly
results = parser.parse_wine_list(sample_text)

# Print results
print(json.dumps(results, indent=2))

[
  {
    "id": "1148",
    "producer": "Alexandre Bonnet",
    "name": "'La Geande' 7 Cepages'",
    "type": "Extra Brut",
    "main_type": "SPARKLING",
    "region": "CHAMPAGNE",
    "country": "FRANCE",
    "vintage": "NV",
    "price": "310",
    "size": "bottle"
  },
  {
    "id": "1147",
    "producer": "Alexandre Filaine",
    "name": "'Cuv\u00e9e Sp\u00e9ciale'",
    "type": "Brut",
    "main_type": "SPARKLING",
    "region": "CHAMPAGNE",
    "country": "FRANCE",
    "vintage": "NV",
    "price": "235",
    "size": "bottle"
  },
  {
    "id": "1100",
    "producer": "Pascal Agrapart",
    "name": "'Min\u00e8ral'",
    "type": "Extra Brut",
    "main_type": "SPARKLING",
    "region": "CHAMPAGNE",
    "country": "FRANCE",
    "vintage": "2018",
    "price": "350",
    "size": "bottle"
  }
]


In [38]:
text = extract_text_from_pdf("menus/rake-wine.pdf")
pages = [text[i] for i in text.keys()]

In [39]:
from tqdm import tqdm

parser = GeminiWineParser(google_key)


# Try with full page
for page in tqdm(pages[:1]):
    results = parser.parse_wine_list(page)
    
    
# Create dataframe
df = pd.DataFrame(results)
df

100%|██████████| 1/1 [00:35<00:00, 35.11s/it]


Unnamed: 0,id,producer,name,type,main_type,region,country,vintage,price,size
0,,Chëpìka,Petnat Catawba,PETNAT,WHITE,Finger Lakes,USA,2021.0,64.0,bottle
1,,La Ferme du Vert,L' Angelou Blanc Bulle,,WHITE,Southwest,FRA,2021.0,56.0,bottle
2,,Guiborat,Prisme - Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU EXTRA BRUT",SPARKLING,Champagne,FRA,,144.0,bottle
3,,Pierre Moncuit,Delos Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU BRUT",SPARKLING,Champagne,FRA,,117.0,bottle
4,,Robert Moncuit,Millésime Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU BRUT",SPARKLING,Champagne,FRA,2013.0,196.0,bottle
5,,Azienda Agricola Monban,Questo Neanche,COL FONDO,SPARKLING,Veneto,ITA,2021.0,45.0,bottle
6,,Diletta Tonello,Marachelle,FRIZZANTE,WHITE,Veneto,ITA,,53.0,bottle
7,,Aldo Viola,Brutto,,WHITE,Sicily,ITA,2022.0,49.0,bottle
8,,Joao Pato,Ducking,PETNAT,WHITE,Bairrada,PRT,2022.0,61.0,bottle
9,,Familie Bauer,Unsprung,PETNAT,WHITE,Wagram,AUT,2021.0,52.0,bottle


In [40]:
df.type.value_counts()

PETNAT                                   4
BLANC DE BLANCS, GRAND CRU BRUT          2
BLANC DE BLANCS, GRAND CRU EXTRA BRUT    1
COL FONDO                                1
FRIZZANTE                                1
FRIZZANTE ROSATO                         1
ROSÉ DE SAIGNÉE                          1
BRUT NATURE                              1
METODO TRADICIONAL                       1
METODE ANCESTRAL                         1
Name: type, dtype: int64

In [22]:
def get_vivino_table(wine_df):
    # Run get_vivino_data for each wine in the dataframe

    countries = wine_df["country"].unique()
    print(f"Getting data for {countries} countries")
    country_mapping = {country.name : country.alpha_2 for country in pycountry.countries if country.name in countries}

    wine_df["country_code"] = wine_df["country"].map(country_mapping)

    return wine_df

# Call the function
get_vivino_table(df)


Getting data for ['FRA' 'USA' 'DEU' 'SVN' 'CHL' 'ESP'] countries


Unnamed: 0,id,producer,name,type,main_type,region,country,vintage,price,country_code
0,,Pierre Gimonnet & Fils,"""BdB"" 1er Cru Brut",BLANC DE BLANCS NV,SPARKLING,Champagne,FRA,,68,
1,,Ployez-Jacquemart,Extra Brut Rosé,ROSÉ NV,SPARKLING,Champagne,FRA,,62,
2,,Domaine Moreau-Naudet,Chablis,CHARDONNAY,WHITE,Burgundy,FRA,2020.0,52,
3,,Domaine Roger & Didier Raimbault,Sancerre,SAUVIGNON BLANC,WHITE,Loire,FRA,2020.0,33,
4,,Kistler,"""Les Noisetiers"", Sonoma Coast",CHARDONNAY,WHITE,California,USA,2020.0,71,
5,,Dashe Cellars,Dry Creek Valley Zinfandel,ZINFANDEL,RED,California,USA,2017.0,41,
6,,Corison,Napa Valley Cabernet Sauvignon,CABERNET SAUVIGNON,RED,California,USA,2018.0,116,
7,,O. Horiot,"""Métisse-Les Riceys"", Brut Nature",BRUT NATURE,SPARKLING,Champagne,FRA,,550,
8,,Domaine Benoit Courault,"""Gilbourg"", VDF",CHENIN BLANC,WHITE,Loire Valley,FRA,2020.0,180,
9,,Weingut J. J. Prüm,"""Graacher Himmelreich"", Kabinett",RIESLING,WHITE,Mosel,DEU,2020.0,254,


In [42]:
import requests
from bs4 import BeautifulSoup
import re

# Define the search now that we know the names
def vivino_search(name, producer, type,	region,	country, vintage, menu_price):

    # Define the base URL
    base_url = "https://www.vivino.com/search/wines"

    # Create the search query
    query = f"{name} {producer} {type} {vintage} {region} {country}"

    # Send request to Vivino search page
    params = {"q": query}
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"
    }

    # Send GET request
    response = requests.get(base_url, params=params, headers=headers)

    # Check if request was successful
    if response.status_code != 200:
        print("Failed to fetch data")
        return None

    # Parse the HTML response
    soup = BeautifulSoup(response.text, "html.parser")

    # Find first wine result
    first_result = soup.select_one(".card.card-lg")
    if not first_result:
        print("No results found.")
        return None

    # Extract wine details
    try:
        wine_name = first_result.select_one(".wine-card__name").text.strip()
        link = "https://www.vivino.com" + first_result.select_one("a")["href"]
        country = first_result.select_one(".wine-card__region [data-item-type='country']").text.strip()
        region = first_result.select_one(".wine-card__region .link-color-alt-grey").text.strip()
        rating = first_result.select_one(".average__number").text.strip() if first_result.select_one(".average__number") else "N/A"
        num_ratings = first_result.select_one(".text-micro").text.split(' ratings')[0].strip() if first_result.select_one(".text-micro") else "N/A"
        price = first_result.select_one(".wine-price-value").text.strip() if first_result.select_one(".wine-price-value") else "N/A"

    except AttributeError:
        print("Error extracting data")
        return None
    

    print("Result found:", wine_name)

    print("Checking link:", link)

    link_response = requests.get(link, headers=headers)
    if link_response.status_code != 200:
        print("Failed to fetch data")
        return None
    link_soup = BeautifulSoup(link_response.text, "html.parser")
    
    # Save the data as link.txt
    with open("link.txt", "w") as f:
        f.write(str(link_soup))

    try:
        food_container = link_soup.select_one(".foodPairing__foodContainer--1bvxM")

        # Extract food pairing names
        food_pairings = [str(a).split('aria-label="')[1].split('"')[0] for a in food_container.find_all("a")]

    except AttributeError:
        print("Error extracting food pairings")
        food_pairings = []

    # Extract price if not available
    if len(price) <= 1:
        # print("Price not available. Extracting from page")
        try:
            script_tag = link_soup.find("script", {"type": "application/ld+json"})

            # Load the JSON data
            json_data = json.loads(script_tag.string)

            # Extract the price
            price = json_data.get("offers", {}).get("price")
            
            if price is None:
                # Find the price element
                price_element = link_soup.find("span", class_="purchaseAvailabilityPPC__amount--2_4GT")

                # Extract the text and clean it
                price = price_element.text.strip() if price_element else "N/A"

        except AttributeError:
            print("Error extracting price")

    # Check if price is a number
    if price != "N/A" and price != "-":
        price = price.replace("$", "")
        price = price.replace(" ", "")
        price = price.replace("€", "")
        price = price.replace("£", "")
        price = price.replace("¥", "")
        price = price.replace("₩", "")
        price = price.replace("₹", "")

        try:
            price = float(price)
            price_multiplier = menu_price / price
        except ValueError:
            print("Error converting price to float")
            price_multiplier = "N/A"
    else:
        price_multiplier = "N/A"

    # Create output
    # Return wine data
    data = {
        "name": wine_name,
        "link": link,
        "country": country,
        "region": region,
        "rating": rating,
        "num_ratings": num_ratings,
        "price": price,
        "price_multiplier": price_multiplier,
        "food_pairings": food_pairings
    }

    return data


In [20]:
import requests
from bs4 import BeautifulSoup
import re

# Define the search now that we know the names
def vivino_search_mp(wine_tuple):
    name, producer, type, region, country, vintage, menu_price = wine_tuple

    # Define the base URL
    base_url = "https://www.vivino.com/search/wines"

    # Create the search query
    query = f"{name} {producer} {type} {vintage} {region} {country}"

    # Send request to Vivino search page
    params = {"q": query}
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"
    }

    # Send GET request
    response = requests.get(base_url, params=params, headers=headers)

    # Check if request was successful
    if response.status_code != 200:
        print("Failed to fetch data")
        return None

    # Parse the HTML response
    soup = BeautifulSoup(response.text, "html.parser")

    # Find first wine result
    first_result = soup.select_one(".card.card-lg")
    if not first_result:
        print("No results found.")
        return None

    # Extract wine details
    try:
        wine_name = first_result.select_one(".wine-card__name").text.strip()
        link = "https://www.vivino.com" + first_result.select_one("a")["href"]
        country = first_result.select_one(".wine-card__region [data-item-type='country']").text.strip()
        region = first_result.select_one(".wine-card__region .link-color-alt-grey").text.strip()
        rating = first_result.select_one(".average__number").text.strip() if first_result.select_one(".average__number") else "N/A"
        num_ratings = first_result.select_one(".text-micro").text.split(' ratings')[0].strip() if first_result.select_one(".text-micro") else "N/A"
        price = first_result.select_one(".wine-price-value").text.strip() if first_result.select_one(".wine-price-value") else "N/A"

    except AttributeError:
        print("Error extracting data")
        return None
    
    # Check if price is a number
    if price != "N/A":
        price = float(price)
        price_multiplier = menu_price / price

    # Return wine data
    data = {
        "name": wine_name,
        "link": link,
        "country": country,
        "region": region,
        "rating": rating,
        "num_ratings": num_ratings,
        "price": price,
        "price_multiplier": price_multiplier
    }

    print("Result found:", wine_name)

    print("Checking link:", link)

    link_response = requests.get(link, headers=headers)
    if link_response.status_code != 200:
        print("Failed to fetch data")
        return None
    link_soup = BeautifulSoup(link_response.text, "html.parser")
    
    # Save the data as link.txt
    with open("link.txt", "w") as f:
        f.write(str(link_soup))

    try:
        food_container = link_soup.select_one(".foodPairing__foodContainer--1bvxM")

        # Extract food pairing names
        food_pairings = [str(a).split('aria-label="')[1].split('"')[0] for a in food_container.find_all("a")]

        data["food_pairings"] = food_pairings

    except AttributeError:
        print("Error extracting food pairings")

    # Extract price if not available
    if len(price) <= 1:
        # print("Price not available. Extracting from page")
        try:
            script_tag = link_soup.find("script", {"type": "application/ld+json"})

            # Load the JSON data
            json_data = json.loads(script_tag.string)

            # Extract the price
            price = json_data.get("offers", {}).get("price")
            
            if price is None:
                # Find the price element
                price_element = link_soup.find("span", class_="purchaseAvailabilityPPC__amount--2_4GT")

                # Extract the text and clean it
                price = price_element.text.strip() if price_element else "N/A"


            data["price"] = price

        except AttributeError:
            print("Error extracting price")

    return data


In [51]:
# Test the function

# Select random row from the dataframe
row = df.sample(1).iloc[0]

print(row)

# Get wine data
wine_data = vivino_search(row["name"], row["producer"], row["type"], row["region"], row["country"], row["vintage"])

wine_data

id                              None
producer     Azienda Agricola Monban
name                  Questo Neanche
type                       COL FONDO
main_type                  SPARKLING
region                        Veneto
country                          ITA
vintage                         2021
price                             45
size                          bottle
Name: 5, dtype: object
Result found: Inama Azienda Agricola Vulcaia Fume Sauvignon del Veneto
Checking link: https://www.vivino.com/US/en/wines/1912147
Error extracting taste data


{'name': 'Inama Azienda Agricola Vulcaia Fume Sauvignon del Veneto',
 'link': 'https://www.vivino.com/US/en/wines/1912147',
 'country': 'Italy',
 'region': 'Veneto',
 'rating': '4.2',
 'num_ratings': '5051',
 'price': '$48.99',
 'food_pairings': ['Shellfish',
  'Pasta',
  'Mature and hard cheese',
  'Lean fish',
  'Cured Meat']}

In [25]:
df = pd.read_csv("menus/rake-wine.csv")

df.head()

Unnamed: 0,id,producer,name,type,main_type,region,country,vintage,price,size
0,,Chëpìka,Petnat Catawba,PETNAT,WHITE,Finger Lakes,USA,2021.0,64.0,bottle
1,,La Ferme du Vert,L' Angelou Blanc Bulle,,WHITE,Southwest,FRA,2021.0,56.0,bottle
2,,Guiborat,Prisme - Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU EXTRA BRUT",SPARKLING,Champagne,FRA,,144.0,bottle
3,,Pierre Moncuit,Delos Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU BRUT",SPARKLING,Champagne,FRA,,117.0,bottle
4,,Robert Moncuit,Millésime Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU BRUT",SPARKLING,Champagne,FRA,2013.0,196.0,bottle


In [43]:
from tqdm import tqdm
import time
import multiprocessing

# Get wine data for all wines in the dataframe
def vivino_search_all(df):
    # Create a copy of the dataframe
    new_df = df.copy()

    # Create lists to store the results
    food_pairings = []
    prices = []
    price_multipliers = []
    ratings = []
    num_ratings = []
    links = []

    # Set fail count to quite if 5 fails in a row
    fail_count = 0

    # Iterate over each row in the dataframe
    for index, row in tqdm(new_df.iterrows(), total=len(new_df)):
        # Get wine data
        wine_data = vivino_search(name=row["name"], producer=row["producer"], type=row["type"], region=row["region"], country=row["country"], vintage=row["vintage"], menu_price=row["price"])
        
        # Append the data to the list
        if wine_data:
            food_pairings.append(wine_data["food_pairings"])
            prices.append(wine_data["price"])
            ratings.append(wine_data["rating"])
            links.append(wine_data["link"])
            price_multipliers.append(wine_data["price_multiplier"])
            num_ratings.append(wine_data["num_ratings"])
            fail_count = 0
            
        else:
            food_pairings.append("N/A")
            prices.append("N/A")
            ratings.append("N/A")
            links.append("N/A")
            price_multipliers.append("N/A")
            num_ratings.append("N/A")
            fail_count += 1

        if fail_count >= 5:
            print("Failed 5 times in a row. Pausing for 3 minutes.")
            time.sleep(180)
            
        
        # Pause for a half second to avoid rate limiting
        time.sleep(0.51)

    # Add the lists to the dataframe
    new_df["food_pairings"] = food_pairings
    new_df["vivino_price"] = prices
    new_df["price_multiplier"] = price_multipliers
    new_df["rating"] = ratings
    new_df["link"] = links
    new_df["num_ratings"] = num_ratings
    
    # Rename the price column to menu_price
    new_df.rename(columns={"price": "menu_price"}, inplace=True)

    return new_df

# # Call the function
new_df = vivino_search_all(df)

new_df.head(27)

  0%|          | 0/166 [00:00<?, ?it/s]

Result found: Chëpìka Delaware Pet Nat
Checking link: https://www.vivino.com/US/en/wines/155265081
Error extracting food pairings


  1%|          | 1/166 [00:01<04:38,  1.69s/it]

Result found: Ferme du Vert Jérôme Galaup L'AngeLou Blanc Bulle
Checking link: https://www.vivino.com/US/en/wines/37976367


  1%|          | 2/166 [00:03<04:30,  1.65s/it]

Result found: François Gonet Extra Brut Blanc de Blancs Champagne Grand Cru 'Le Mesnil-sur-Oger'
Checking link: https://www.vivino.com/US/en/wines/163391155


  2%|▏         | 3/166 [00:05<04:32,  1.67s/it]

Result found: Pierre Moncuit Delos Blanc de Blancs Brut Champagne Grand Cru 'Le Mesnil-sur-Oger'
Checking link: https://www.vivino.com/US/en/wines/3765256


  2%|▏         | 4/166 [00:07<05:12,  1.93s/it]

Result found: Robert Moncuit Blanc de Blancs Brut Champagne Grand Cru 'Le Mesnil-sur-Oger' Millésime
Checking link: https://www.vivino.com/US/en/wines/165673198


  3%|▎         | 5/166 [00:09<05:03,  1.89s/it]

Result found: Inama Azienda Agricola Vulcaia Fume Sauvignon del Veneto
Checking link: https://www.vivino.com/US/en/wines/1912147


  4%|▎         | 6/166 [00:10<04:58,  1.86s/it]

Result found: Tonello Marachelle Frizzante
Checking link: https://www.vivino.com/US/en/wines/174221101


  4%|▍         | 7/166 [00:12<04:59,  1.88s/it]

Result found: Aldo Viola Brutto Catarratto
Checking link: https://www.vivino.com/US/en/wines/160956141


  5%|▍         | 8/166 [00:14<04:55,  1.87s/it]

Result found: Luis Pato João Pato Branco
Checking link: https://www.vivino.com/US/en/wines/167366463


  5%|▌         | 9/166 [00:16<05:06,  1.95s/it]

Result found: Familie Bauer Barig Alte Reben
Checking link: https://www.vivino.com/US/en/wines/164244051


  6%|▌         | 10/166 [00:18<04:52,  1.87s/it]

Result found: Annesanti Raspato Pet Nat Rosé
Checking link: https://www.vivino.com/US/en/wines/161193783


  7%|▋         | 11/166 [00:20<04:52,  1.89s/it]

Result found: Laherte Freres Les Beaudiers Rosé de Saignée Vieilles Vignes de Pinot Meunier Extra-Brut Champagne
Checking link: https://www.vivino.com/US/en/wines/3477991


  7%|▋         | 12/166 [00:22<05:01,  1.96s/it]

Result found: Weszeli We Pét Nat Rosé
Checking link: https://www.vivino.com/US/en/wines/167794791


  8%|▊         | 13/166 [00:24<04:46,  1.87s/it]

Result found: Dulcinea Santos Ferreira Sidónio de Sousa Rosé Brut Nature
Checking link: https://www.vivino.com/US/en/wines/154894172


  8%|▊         | 14/166 [00:26<05:01,  1.98s/it]

Result found: Filipa Pato 3B Metodo Tradicional Rosé
Checking link: https://www.vivino.com/US/en/wines/1612574


  9%|▉         | 15/166 [00:28<04:56,  1.97s/it]

Result found: Mas Foraster Josep Foraster Trepat
Checking link: https://www.vivino.com/US/en/wines/1607472


 10%|▉         | 16/166 [00:30<04:52,  1.95s/it]

Result found: Nevio Scala Gargànte
Checking link: https://www.vivino.com/US/en/wines/155338314


 10%|█         | 17/166 [00:32<04:49,  1.94s/it]

Result found: Folicello Il Rosso Lambrusco Emilia
Checking link: https://www.vivino.com/US/en/wines/151048544


 11%|█         | 18/166 [00:34<04:44,  1.92s/it]

Result found: Escala Humana Livverá Malvasia
Checking link: https://www.vivino.com/US/en/wines/147702744


 11%|█▏        | 19/166 [00:35<04:36,  1.88s/it]

Result found: Heinrich Naked White
Checking link: https://www.vivino.com/US/en/wines/159554043


 12%|█▏        | 20/166 [00:37<04:30,  1.85s/it]

Result found: Matthias Warnung Basis Grüner Veltliner
Checking link: https://www.vivino.com/US/en/wines/26999350


 13%|█▎        | 21/166 [00:39<04:19,  1.79s/it]

Result found: Hager Matthias Grüner Veltliner
Checking link: https://www.vivino.com/US/en/wines/2985184


 13%|█▎        | 22/166 [00:41<04:26,  1.85s/it]

Result found: Au Bon Climat Chardonnay Los Alamos Vineyard
Checking link: https://www.vivino.com/US/en/wines/2187482


 14%|█▍        | 23/166 [00:43<04:19,  1.82s/it]

Result found: Arnot-Roberts Watson Ranch Vineyard Chardonnay
Checking link: https://www.vivino.com/US/en/wines/4736270


 14%|█▍        | 24/166 [00:45<04:26,  1.88s/it]

Result found: Martha Stoumen Nero d'Avola
Checking link: https://www.vivino.com/US/en/wines/154847452
Error extracting food pairings


 15%|█▌        | 25/166 [00:46<04:15,  1.81s/it]

Result found: Rogue Vine Grand Itata Tinto
Checking link: https://www.vivino.com/US/en/wines/4157682
Error extracting food pairings


 16%|█▌        | 26/166 [00:50<05:39,  2.43s/it]

Result found: Voirin-Jumel Coteaux Champenois Blanc Chardonnay de Cramant
Checking link: https://www.vivino.com/US/en/wines/9039780


 16%|█▋        | 27/166 [00:54<06:25,  2.77s/it]

Result found: Domaine de la Touraize Savagnin Sous Voile Arbois
Checking link: https://www.vivino.com/US/en/wines/176807180


 17%|█▋        | 28/166 [00:56<05:45,  2.51s/it]

Result found: Pierre Richard Les Marnes Chardonnay
Checking link: https://www.vivino.com/US/en/wines/167669890


 17%|█▋        | 29/166 [00:58<05:34,  2.44s/it]

Result found: Jean et Sébastien Dauvissat Chablis Grand Cru 'Les Preuses'
Checking link: https://www.vivino.com/US/en/wines/4941747


 18%|█▊        | 30/166 [01:01<05:48,  2.56s/it]

Result found: Domaine de l'Ecu Nobis
Checking link: https://www.vivino.com/US/en/wines/157874853
Error extracting food pairings


 19%|█▊        | 31/166 [01:02<05:11,  2.31s/it]

Result found: Manuel Pineau Le Jardin de La Hyonniére Melon de Bourgogne
Checking link: https://www.vivino.com/US/en/wines/171487343
Error extracting food pairings


 19%|█▉        | 32/166 [01:04<04:42,  2.11s/it]

Result found: Domaine du Closel - Château des Vaults Belle Dame Chenin Blanc
Checking link: https://www.vivino.com/US/en/wines/160770397


 20%|█▉        | 33/166 [01:06<04:22,  1.97s/it]

Result found: Nicolas Idiart Chenin Blanc
Checking link: https://www.vivino.com/US/en/wines/157388895


 20%|██        | 34/166 [01:08<04:12,  1.92s/it]

Result found: Lucien Crochet Le Chêne Marchand Sancerre Blanc
Checking link: https://www.vivino.com/US/en/wines/2600109


 21%|██        | 35/166 [01:10<04:14,  1.95s/it]

Result found: Mann Vignoble des 3 Terres La Lluna Pinot Noir
Checking link: https://www.vivino.com/US/en/wines/176095443


 22%|██▏       | 36/166 [01:12<04:41,  2.17s/it]

Result found: Franck Balthazar Côtes du Rhône
Checking link: https://www.vivino.com/US/en/wines/2045972


 22%|██▏       | 37/166 [01:14<04:22,  2.04s/it]

Result found: Vigneti Massa Derthona
Checking link: https://www.vivino.com/US/en/wines/1923172


 23%|██▎       | 38/166 [01:16<04:07,  1.93s/it]

Result found: Fratelli Ponte Roero Arneis
Checking link: https://www.vivino.com/US/en/wines/3981423


 23%|██▎       | 39/166 [01:17<04:01,  1.90s/it]

Result found: De Forville Moscato d'Asti
Checking link: https://www.vivino.com/US/en/wines/142245163


 24%|██▍       | 40/166 [01:20<04:07,  1.96s/it]

Result found: Nevio Scala Blanko
Checking link: https://www.vivino.com/US/en/wines/175326649


 25%|██▍       | 41/166 [01:21<03:57,  1.90s/it]

Result found: Anselmi San Vincenzo
Checking link: https://www.vivino.com/US/en/wines/1662726


 25%|██▌       | 42/166 [01:23<03:49,  1.85s/it]

Result found: Luisa I Ferretti Collezione Friulano
Checking link: https://www.vivino.com/US/en/wines/143564437


 26%|██▌       | 43/166 [01:25<03:56,  1.92s/it]

Result found: Regno di Bacco Friuli Colli Orientali Ribolla Gialla
Checking link: https://www.vivino.com/US/en/wines/3774335


 27%|██▋       | 44/166 [01:27<03:52,  1.90s/it]

Result found: Bruno Verdi Pinot Grigio
Checking link: https://www.vivino.com/US/en/wines/4985386


 27%|██▋       | 45/166 [01:29<03:49,  1.89s/it]

Result found: Tiberio Pecorino
Checking link: https://www.vivino.com/US/en/wines/1577902


 28%|██▊       | 46/166 [01:31<03:49,  1.91s/it]

Result found: Ferracane Guanciabianca Catarratto
Checking link: https://www.vivino.com/US/en/wines/8568902


 28%|██▊       | 47/166 [01:33<03:39,  1.84s/it]

Result found: Castellucci Miano Miano Bianco
Checking link: https://www.vivino.com/US/en/wines/4966645


 29%|██▉       | 48/166 [01:35<04:02,  2.06s/it]

Result found: Joh. Jos. Prüm Graacher Himmelreich Riesling Kabinett
Checking link: https://www.vivino.com/US/en/wines/4758356


 30%|██▉       | 49/166 [01:37<04:02,  2.07s/it]

Result found: Joh. Jos. Prüm Edition Weinhaus Wehlener Sonnenuhr Riesling Spätlese
Checking link: https://www.vivino.com/US/en/wines/171420718


 30%|███       | 50/166 [01:39<03:49,  1.98s/it]

Result found: Domaine Tetta Chardonnay Perlant
Checking link: https://www.vivino.com/US/en/wines/167284615


 31%|███       | 51/166 [01:41<03:38,  1.90s/it]

Result found: Dr. Konstantin Frank Riesling Dry
Checking link: https://www.vivino.com/US/en/wines/16426421


 31%|███▏      | 52/166 [01:43<04:03,  2.13s/it]

Result found: Usonia Cayuga
Checking link: https://www.vivino.com/US/en/wines/175645468
Error extracting food pairings


 32%|███▏      | 53/166 [01:45<03:43,  1.98s/it]

Result found: Távora Terras do Demo Verdelho
Checking link: https://www.vivino.com/US/en/wines/158321125


 33%|███▎      | 54/166 [01:47<03:34,  1.91s/it]

Result found: The Portugal Wine Firm Ayres
Checking link: https://www.vivino.com/US/en/wines/170490117


 33%|███▎      | 55/166 [01:49<03:29,  1.88s/it]

Result found: Niepoort V.V. Bical - Maria Gomes Vinhas Velhas
Checking link: https://www.vivino.com/US/en/wines/37935368


 34%|███▎      | 56/166 [01:51<03:47,  2.07s/it]

Result found: Joao Portugal Ramos Loios Medium-Sweet Branco
Checking link: https://www.vivino.com/US/en/wines/164772769


 34%|███▍      | 57/166 [01:53<03:30,  1.93s/it]

Result found: Eulogio Pomares O Estranxeiro Tinto
Checking link: https://www.vivino.com/US/en/wines/167110787


 35%|███▍      | 58/166 [01:55<03:40,  2.04s/it]

Result found: César Márquez La Salvación Godello
Checking link: https://www.vivino.com/US/en/wines/152878113
Error extracting food pairings


 36%|███▌      | 59/166 [01:57<03:28,  1.95s/it]

Result found: Iria Otero Sacabeira Albariño
Checking link: https://www.vivino.com/US/en/wines/154373922


 36%|███▌      | 60/166 [01:58<03:21,  1.90s/it]

Result found: Iria Otero Alebub
Checking link: https://www.vivino.com/US/en/wines/170464803
Error extracting food pairings


 37%|███▋      | 61/166 [02:00<03:11,  1.82s/it]

Result found: Bodegas Gallegas Gran Alanis Treixadura - Godello
Checking link: https://www.vivino.com/US/en/wines/154319144
Error extracting food pairings


 37%|███▋      | 62/166 [02:02<03:13,  1.86s/it]

Result found: Orto Vins Les Argiles d'Orto Blanc
Checking link: https://www.vivino.com/US/en/wines/39562813
Error extracting food pairings


 38%|███▊      | 63/166 [02:04<03:07,  1.82s/it]

Result found: Basa-Lore Hondarrabi Zuri
Checking link: https://www.vivino.com/US/en/wines/167487228


 39%|███▊      | 64/166 [02:06<03:06,  1.83s/it]

Result found: Talaiot Blanc Pla I Llevant Mallorca Premsal Macabeu Chardonnay Parellada Moscatel
Checking link: https://www.vivino.com/US/en/wines/9233335


 39%|███▉      | 65/166 [02:08<03:11,  1.90s/it]

Result found: Ruben Diaz Finca La Coronilla Chassellas Dore - Albillo
Checking link: https://www.vivino.com/US/en/wines/159358442
Error extracting food pairings


 40%|███▉      | 66/166 [02:09<03:06,  1.86s/it]

Result found: Victoria Ordoñez La Ola del Melillero Pedro Ximenez - Moscatel de Alejandría
Checking link: https://www.vivino.com/US/en/wines/144648944
Error extracting food pairings


 40%|████      | 67/166 [02:12<03:11,  1.93s/it]

Result found: Meinklang Graupert Pinot Gris
Checking link: https://www.vivino.com/US/en/wines/8371024


 41%|████      | 68/166 [02:14<03:12,  1.97s/it]

Result found: Curran Grenache Blanc
Checking link: https://www.vivino.com/US/en/wines/4736142


 42%|████▏     | 69/166 [02:15<03:06,  1.92s/it]

Result found: Villard JCV Charlie Villard Ramato Pinot Grigio
Checking link: https://www.vivino.com/US/en/wines/157388507
Error extracting food pairings


 42%|████▏     | 70/166 [02:17<02:56,  1.83s/it]

Result found: Les Vins Pirouettes Le Brutal de Jean-Marc
Checking link: https://www.vivino.com/US/en/wines/165681804


 43%|████▎     | 71/166 [02:19<02:58,  1.87s/it]

Result found: Les Solstices (FR) Cuvée Tradition Blanc de France
Checking link: https://www.vivino.com/US/en/wines/151405188
Error extracting food pairings


 43%|████▎     | 72/166 [02:21<03:01,  1.93s/it]

Result found: Marks & Spencer Val de Loire Sauvignon Blanc
Checking link: https://www.vivino.com/US/en/wines/6162517
Error extracting food pairings


 44%|████▍     | 73/166 [02:23<02:52,  1.86s/it]

Result found: Gabriel's Wine Kakhuri Mtsvane - Rkatsiteli
Checking link: https://www.vivino.com/US/en/wines/160549112


 45%|████▍     | 74/166 [02:25<02:54,  1.89s/it]

Result found: Dila-o Rkatsiteli - Mtsvane Amber Dry
Checking link: https://www.vivino.com/US/en/wines/153291990


 45%|████▌     | 75/166 [02:27<03:00,  1.99s/it]

Result found: Qvevri Wine Cellar Mtsvane Qvevri
Checking link: https://www.vivino.com/US/en/wines/154783616


 46%|████▌     | 76/166 [02:29<02:51,  1.90s/it]

Result found: Kiketi Farm Wine (კიკეთის ფერმის ღვინო) Khikhvi Qvevri
Checking link: https://www.vivino.com/US/en/wines/174046883


 46%|████▋     | 77/166 [02:30<02:41,  1.81s/it]

Result found: Nevio Scala Cóntame
Checking link: https://www.vivino.com/US/en/wines/158707779


 47%|████▋     | 78/166 [02:32<02:41,  1.84s/it]

Result found: Podere Pradarolo Velius Rosso Asciutto
Checking link: https://www.vivino.com/US/en/wines/2400430


 48%|████▊     | 79/166 [02:34<02:38,  1.82s/it]

Result found: Az. Agr. Cauda Giuseppe Terre Alfieri Nebbiolo
Checking link: https://www.vivino.com/US/en/wines/76495672


 48%|████▊     | 80/166 [02:36<02:48,  1.95s/it]

Result found: Mas del Périé - Fabien Jouves Skin-Contact Maceracion
Checking link: https://www.vivino.com/US/en/wines/163771968
Error extracting food pairings


 49%|████▉     | 81/166 [02:38<02:47,  1.97s/it]

Result found: Bojo do Luar Deu Bode
Checking link: https://www.vivino.com/US/en/wines/165412068


 49%|████▉     | 82/166 [02:40<02:46,  1.98s/it]

Result found: Bojo do Luar Doralice
Checking link: https://www.vivino.com/US/en/wines/167978638


 50%|█████     | 83/166 [02:42<02:40,  1.94s/it]

Result found: Celler Sanromà We've Made Contact
Checking link: https://www.vivino.com/US/en/wines/175114853
Error extracting food pairings


 51%|█████     | 84/166 [02:45<02:54,  2.13s/it]

Result found: Costador Metamorphika Xarel·lo Orange (Brisat)
Checking link: https://www.vivino.com/US/en/wines/157654106
Error extracting food pairings


 51%|█████     | 85/166 [02:47<02:54,  2.15s/it]

Result found: Alonso & Pedrajo Nauda Blanco
Checking link: https://www.vivino.com/US/en/wines/172638266


 52%|█████▏    | 86/166 [02:49<02:42,  2.03s/it]

Failed to fetch data


 52%|█████▏    | 87/166 [02:49<02:06,  1.60s/it]

Failed to fetch data


 53%|█████▎    | 88/166 [02:50<01:41,  1.30s/it]

Failed to fetch data


 54%|█████▎    | 89/166 [02:50<01:24,  1.10s/it]

Failed to fetch data


 54%|█████▍    | 90/166 [02:51<01:12,  1.05it/s]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 55%|█████▍    | 91/166 [03:02<04:49,  3.86s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 55%|█████▌    | 92/166 [03:12<07:16,  5.90s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 56%|█████▌    | 93/166 [03:23<08:54,  7.32s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 57%|█████▋    | 94/166 [03:34<09:59,  8.32s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 57%|█████▋    | 95/166 [03:44<10:40,  9.02s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 58%|█████▊    | 96/166 [03:55<11:05,  9.51s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 58%|█████▊    | 97/166 [04:06<11:18,  9.84s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 59%|█████▉    | 98/166 [04:16<11:25, 10.08s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 60%|█████▉    | 99/166 [04:27<11:26, 10.24s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 60%|██████    | 100/166 [04:37<11:23, 10.36s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 61%|██████    | 101/166 [04:48<11:18, 10.44s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 61%|██████▏   | 102/166 [04:59<11:11, 10.50s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 62%|██████▏   | 103/166 [05:09<11:04, 10.54s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 63%|██████▎   | 104/166 [05:20<10:55, 10.57s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 63%|██████▎   | 105/166 [05:31<10:45, 10.59s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 64%|██████▍   | 106/166 [05:41<10:35, 10.60s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 64%|██████▍   | 107/166 [05:52<10:25, 10.61s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 65%|██████▌   | 108/166 [06:02<10:15, 10.61s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 66%|██████▌   | 109/166 [06:13<10:04, 10.61s/it]

Result found: Arnot-Roberts Fellom Ranch Cabernet Sauvignon
Checking link: https://www.vivino.com/US/en/wines/4736264


 66%|██████▋   | 110/166 [06:15<07:28,  8.02s/it]

Result found: Matthiasson Cabernet Sauvignon
Checking link: https://www.vivino.com/US/en/wines/4885711


 67%|██████▋   | 111/166 [06:17<05:43,  6.25s/it]

Result found: Bedrock Wine Co. Cabernet Sauvignon
Checking link: https://www.vivino.com/US/en/wines/160010627


 67%|██████▋   | 112/166 [06:19<04:25,  4.92s/it]

Result found: Sky Vineyards Late Harvest Zinfandel
Checking link: https://www.vivino.com/US/en/wines/3314505


 68%|██████▊   | 113/166 [06:21<03:30,  3.96s/it]

Result found: Erggelet Brothers Zinfandel
Checking link: https://www.vivino.com/US/en/wines/166029741


 69%|██████▊   | 114/166 [06:23<02:57,  3.42s/it]

Result found: A Los Viñateros Bravos Granítico País
Checking link: https://www.vivino.com/US/en/wines/157410486
Error extracting food pairings


 69%|██████▉   | 115/166 [06:25<02:30,  2.95s/it]

Result found: A Los Viñateros Bravos Granítico Cinsault
Checking link: https://www.vivino.com/US/en/wines/152773611
Error extracting food pairings


 70%|██████▉   | 116/166 [06:27<02:11,  2.62s/it]

Result found: A Los Viñateros Bravos El Túnel
Checking link: https://www.vivino.com/US/en/wines/152772560
Error extracting food pairings


 70%|███████   | 117/166 [06:29<02:04,  2.54s/it]

Result found: Roberto Henriquez País Verde
Checking link: https://www.vivino.com/US/en/wines/158175790
Error extracting food pairings


 71%|███████   | 118/166 [06:31<01:53,  2.37s/it]

Result found: Vinogorje Brac SV.Vid Plavac Mali Kvalitetno Vino
Checking link: https://www.vivino.com/US/en/wines/4954897


 72%|███████▏  | 119/166 [06:33<01:44,  2.23s/it]

Result found: Domaine Les Grandes Vignes - Vaillant Le P'tit Vaillant Blanc
Checking link: https://www.vivino.com/US/en/wines/177224874
Error extracting food pairings


 72%|███████▏  | 120/166 [06:35<01:45,  2.29s/it]

Result found: Domaine Breton - Catherine & Pierre Breton Clos Sénéchal
Checking link: https://www.vivino.com/US/en/wines/4774002


 73%|███████▎  | 121/166 [06:37<01:40,  2.24s/it]

Result found: Couly-Dutheil Retour au Franc Chinon Cabernet Franc
Checking link: https://www.vivino.com/US/en/wines/43406158
Error extracting food pairings


 73%|███████▎  | 122/166 [06:39<01:29,  2.03s/it]

Result found: Pierre Richard Hermétique Trousseau
Checking link: https://www.vivino.com/US/en/wines/170698054


 74%|███████▍  | 123/166 [06:41<01:23,  1.95s/it]

Result found: Domaine Chevrot Santenay 1er Cru 'Clos Rousseau'
Checking link: https://www.vivino.com/US/en/wines/4769574


 75%|███████▍  | 124/166 [06:43<01:23,  1.99s/it]

Result found: Château Cambon Beaujolais
Checking link: https://www.vivino.com/US/en/wines/3660665


 75%|███████▌  | 125/166 [06:45<01:20,  1.97s/it]

Result found: Clusel-Roch Les Schistes Côte-Rôtie
Checking link: https://www.vivino.com/US/en/wines/158840078


 76%|███████▌  | 126/166 [06:47<01:19,  1.99s/it]

Result found: Charles et Francois Tardy Domaine des Entrefaux Crozes-Hermitage
Checking link: https://www.vivino.com/US/en/wines/1502011


 77%|███████▋  | 127/166 [06:49<01:18,  2.00s/it]

Result found: Domaine André Mathieu Châteauneuf-du-Pape
Checking link: https://www.vivino.com/US/en/wines/1487589


 77%|███████▋  | 128/166 [06:51<01:15,  1.99s/it]

Result found: Château Tour de Bonnet Grande Cuvée Merlot - Cabernet Sauvignon
Checking link: https://www.vivino.com/US/en/wines/36343673


 78%|███████▊  | 129/166 [06:53<01:15,  2.04s/it]

Result found: Château Peyredon Lagravette Haut-Médoc Rouge
Checking link: https://www.vivino.com/US/en/wines/4824864


 78%|███████▊  | 130/166 [06:55<01:11,  1.99s/it]

Result found: Matassa Coume de l'Olla Rouge
Checking link: https://www.vivino.com/US/en/wines/30923686
Error extracting food pairings


 79%|███████▉  | 131/166 [06:57<01:08,  1.95s/it]

Result found: Clos d'Audhuy Toujours Plus Malbec
Checking link: https://www.vivino.com/US/en/wines/175280909


 80%|███████▉  | 132/166 [06:58<01:03,  1.87s/it]

Result found: Naberauli Wines (ნაბერაული) Saperavi - Dzelshavi (საფერავი - ძელშავი წითელი მშრალი)
Checking link: https://www.vivino.com/US/en/wines/169317194


 80%|████████  | 133/166 [07:00<01:02,  1.89s/it]

Result found: Trullo di Pezza Licurti Primitivo di Manduria
Checking link: https://www.vivino.com/US/en/wines/5818920


 81%|████████  | 134/166 [07:02<01:03,  1.99s/it]

Result found: Emidio Pepe Montepulciano d'Abruzzo
Checking link: https://www.vivino.com/US/en/wines/1640203


 81%|████████▏ | 135/166 [07:06<01:14,  2.41s/it]

Result found: De Angelis Corvi Fonte Raviliano Montepulciano d'Abruzzo Colline Teramane
Checking link: https://www.vivino.com/US/en/wines/155607401


 82%|████████▏ | 136/166 [07:08<01:06,  2.22s/it]

Result found: Istine Chianti Classico
Checking link: https://www.vivino.com/US/en/wines/4993003


 83%|████████▎ | 137/166 [07:09<00:59,  2.05s/it]

Result found: Ombretta Agricola Fermo Rosso
Checking link: https://www.vivino.com/US/en/wines/161862994


 83%|████████▎ | 138/166 [07:12<00:59,  2.12s/it]

Result found: Fuso Tèh Rosso
Checking link: https://www.vivino.com/US/en/wines/172637975


 84%|████████▎ | 139/166 [07:13<00:54,  2.03s/it]

Result found: Cascina Gasparda Da Nord Rosso
Checking link: https://www.vivino.com/US/en/wines/172251976


 84%|████████▍ | 140/166 [07:15<00:52,  2.01s/it]

Result found: Fabio Gea La Msòira e'l Rastel Pino Dolcetto d'Alba Superiore
Checking link: https://www.vivino.com/US/en/wines/131219095


 85%|████████▍ | 141/166 [07:17<00:50,  2.01s/it]

Result found: Rivella Serafino Montestefano Barbaresco
Checking link: https://www.vivino.com/US/en/wines/1582904


 86%|████████▌ | 142/166 [07:19<00:46,  1.94s/it]

Result found: Marziano E Enrico Abbona d'Alba Nebbiolo
Checking link: https://www.vivino.com/US/en/wines/15558554


 86%|████████▌ | 143/166 [07:21<00:42,  1.84s/it]

Result found: Viberti Giacomo e Figli Palotu Langhe Nebbiolo
Checking link: https://www.vivino.com/US/en/wines/4982454


 87%|████████▋ | 144/166 [07:22<00:39,  1.81s/it]

Result found: Ap Vino Rosso
Checking link: https://www.vivino.com/US/en/wines/1468844


 87%|████████▋ | 145/166 [07:24<00:37,  1.77s/it]

Result found: Feudo Montoni Vigna Lagnusa Nero d'Avola
Checking link: https://www.vivino.com/US/en/wines/10417723


 88%|████████▊ | 146/166 [07:27<00:39,  1.98s/it]

Result found: Bichi Gordo Guapo
Checking link: https://www.vivino.com/US/en/wines/176312865
Error extracting food pairings


 89%|████████▊ | 147/166 [07:28<00:35,  1.89s/it]

Result found: Ravines Cabernet Franc
Checking link: https://www.vivino.com/US/en/wines/2452978


 89%|████████▉ | 148/166 [07:31<00:37,  2.07s/it]

Result found: Portal do Minho Branco
Checking link: https://www.vivino.com/US/en/wines/1585652


 90%|████████▉ | 149/166 [07:33<00:34,  2.05s/it]

Result found: Bojo do Luar Duplo
Checking link: https://www.vivino.com/US/en/wines/171676424


 90%|█████████ | 150/166 [07:35<00:32,  2.06s/it]

Result found: Hugo Mendes Castelão
Checking link: https://www.vivino.com/US/en/wines/165588160


 91%|█████████ | 151/166 [07:37<00:31,  2.09s/it]

Result found: Casa de Saima Grande Reserva Baga da Corga
Checking link: https://www.vivino.com/US/en/wines/3491716


 92%|█████████▏| 152/166 [07:39<00:27,  1.99s/it]

Result found: Maurer Crazylud
Checking link: https://www.vivino.com/US/en/wines/164547547
Error extracting food pairings


 92%|█████████▏| 153/166 [07:40<00:24,  1.90s/it]

Result found: Iria Otero A Seara Castes Tintas
Checking link: https://www.vivino.com/US/en/wines/168047540


 93%|█████████▎| 154/166 [07:43<00:23,  1.97s/it]

Result found: Iria Otero Teixugo
Checking link: https://www.vivino.com/US/en/wines/164855876


 93%|█████████▎| 155/166 [07:44<00:20,  1.82s/it]

Result found: César Márquez Pico Ferreira
Checking link: https://www.vivino.com/US/en/wines/154558785


 94%|█████████▍| 156/166 [07:46<00:17,  1.80s/it]

Result found: R. López de Heredia Viña Tondonia Reserva
Checking link: https://www.vivino.com/US/en/wines/1970246


 95%|█████████▍| 157/166 [07:49<00:19,  2.17s/it]

Result found: Marques del Atrio Tempranillo - Graciano
Checking link: https://www.vivino.com/US/en/wines/161225568


 95%|█████████▌| 158/166 [07:51<00:17,  2.14s/it]

Result found: Vega Sicilia Valbuena 5º (Reserva)
Checking link: https://www.vivino.com/US/en/wines/1491028


 96%|█████████▌| 159/166 [07:53<00:15,  2.17s/it]

Failed to fetch data


 96%|█████████▋| 160/166 [07:54<00:10,  1.71s/it]

Failed to fetch data


 97%|█████████▋| 161/166 [07:54<00:06,  1.38s/it]

Failed to fetch data


 98%|█████████▊| 162/166 [07:55<00:04,  1.15s/it]

Failed to fetch data


 98%|█████████▊| 163/166 [07:56<00:02,  1.01it/s]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 99%|█████████▉| 164/166 [08:06<00:07,  3.87s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


 99%|█████████▉| 165/166 [08:17<00:05,  5.99s/it]

Failed to fetch data
Failed 5 times in a row. Pausing for 10 seconds.


100%|██████████| 166/166 [08:28<00:00,  3.06s/it]


Unnamed: 0,id,producer,name,type,main_type,region,country,vintage,menu_price,size,food_pairings,vivino_price,price_multiplier,rating,link
0,,Chëpìka,Petnat Catawba,PETNAT,WHITE,Finger Lakes,USA,2021.0,64.0,bottle,[],30.15,2.12272,3.9,https://www.vivino.com/US/en/wines/155265081
1,,La Ferme du Vert,L' Angelou Blanc Bulle,,WHITE,Southwest,FRA,2021.0,56.0,bottle,"[Shellfish, Poultry, Appetizers and snacks, Le...",21.59,2.593793,3.9,https://www.vivino.com/US/en/wines/37976367
2,,Guiborat,Prisme - Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU EXTRA BRUT",SPARKLING,Champagne,FRA,,144.0,bottle,"[Pork, Shellfish, Rich fish (salmon, tuna etc)...",23.99,6.002501,4.0,https://www.vivino.com/US/en/wines/163391155
3,,Pierre Moncuit,Delos Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU BRUT",SPARKLING,Champagne,FRA,,117.0,bottle,"[Pork, Shellfish, Rich fish (salmon, tuna etc)...",,,4.1,https://www.vivino.com/US/en/wines/3765256
4,,Robert Moncuit,Millésime Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU BRUT",SPARKLING,Champagne,FRA,2013.0,196.0,bottle,"[Pork, Shellfish, Rich fish (salmon, tuna etc)...",,,4.2,https://www.vivino.com/US/en/wines/165673198
5,,Azienda Agricola Monban,Questo Neanche,COL FONDO,SPARKLING,Veneto,ITA,2021.0,45.0,bottle,"[Shellfish, Pasta, Mature and hard cheese, Lea...",48.99,0.918555,4.2,https://www.vivino.com/US/en/wines/1912147
6,,Diletta Tonello,Marachelle,FRIZZANTE,WHITE,Veneto,ITA,,53.0,bottle,"[Appetizers and snacks, Aperitif]",,,4.0,https://www.vivino.com/US/en/wines/174221101
7,,Aldo Viola,Brutto,,WHITE,Sicily,ITA,2022.0,49.0,bottle,"[Appetizers and snacks, Aperitif]",,,4.0,https://www.vivino.com/US/en/wines/160956141
8,,Joao Pato,Ducking,PETNAT,WHITE,Bairrada,PRT,2022.0,61.0,bottle,"[Shellfish, Rich fish (salmon, tuna etc), Appe...",,,3.9,https://www.vivino.com/US/en/wines/167366463
9,,Familie Bauer,Unsprung,PETNAT,WHITE,Wagram,AUT,2021.0,52.0,bottle,"[Pork, Shellfish, Poultry, Rich fish (salmon, ...",22.99,2.261853,4.1,https://www.vivino.com/US/en/wines/164244051


In [22]:
def vivino_search_multiprocess(df):
    """
    Process DataFrame rows in parallel using multiprocessing.

    Args:
        df (pd.DataFrame): Input DataFrame containing wine information

    Returns:
        pd.DataFrame: DataFrame with added Vivino search results
    """
    # Convert DataFrame to list of dictionaries for easier serialization
    records = df.to_dict("records")

    # Create a pool of processes
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() - 1)

    # Run the search for each record in parallel
    results = list(tqdm(pool.imap(vivino_search_mp, records), total=len(records)))

    # Close the pool
    pool.close()
    pool.join()

    # Create lists to store the results
    food_pairings = []
    prices = []
    price_multipliers = []
    ratings = []
    links = []

    # Extract the results from the list
    for result in results:
        if result:
            food_pairings.append(result.get("food_pairings", []))
            prices.append(result.get("price", "N/A"))
            price_multipliers.append(result.get("price_multiplier", 1.0))
            ratings.append(result.get("rating", "N/A"))
            links.append(result.get("link", "N/A"))
        else:
            # Handle None results
            food_pairings.append([])
            prices.append("N/A")
            price_multipliers.append(1.0)
            ratings.append("N/A")
            links.append("N/A")

    # Create a new DataFrame with the results
    new_df = df.copy()
    new_df["food_pairings"] = food_pairings
    new_df["vivino_price"] = prices
    new_df["price_multiplier"] = price_multipliers
    new_df["rating"] = ratings
    new_df["link"] = links

    # Rename the price column to menu_price
    new_df.rename(columns={"price": "menu_price"}, inplace=True)

    return new_df

# Test the function
vivino_df = vivino_search_multiprocess(df.head(10))

vivino_df.head()

  0%|          | 0/10 [00:00<?, ?it/s]Process SpawnPoolWorker-26:
Process SpawnPoolWorker-23:
Process SpawnPoolWorker-22:
Process SpawnPoolWorker-25:
Process SpawnPoolWorker-24:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/Austin/opt/anaconda3/envs/ANLY501/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/Austin/opt/anaconda3/envs/ANLY501/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/Austin/opt/anaconda3/envs/ANLY501/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/Austin/opt/anaconda3/envs/ANLY501/lib/python3.10/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/Austin/opt/anaconda3/envs/ANLY501/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/Austin/opt/anaconda3/en

KeyboardInterrupt: 

In [3]:
# Test functions imports
from functions import *
from dotenv import load_dotenv
import os
import pandas as pd
import requests
import json
import PyPDF2
import google.generativeai as genai
import re
import numpy as np

load_dotenv(dotenv_path='config.env')
google_key = os.getenv('GOOGLE_KEY')

if google_key is None:
    raise ValueError("GOOGLE_KEY not found in .env file")

parser = GeminiWineParser(google_key)

# Read the PDF
text = extract_text_from_pdf("menus/rake-wine.pdf")
pages = [text[i] for i in text.keys()]

# View the first page
pages[0]

'White\nChëpìka, Petnat Catawba 2021 Finger Lakes, NY 64\nLa Ferme du Vert, "L\' Angelou Blanc Bulle" Mauzac Blanc 2021 Southwest, FRA 56\nGuiborat, "Prisme - Blanc de Blancs", Grand Cru Extra Brut Chardonnay NV Champagne, FRA 144\nPierre Moncuit, "Delos Blanc de Blancs" Grand Cru Brut Chardonnay NV Champagne, FRA 117\nRobert Moncuit, "Millésime Blanc de Blancs", Grand Cru Brut Chardonnay 2013 Champagne, FRA 196\nAzienda Agricola Monban, "Questo Neanche", Col Fondo Glera 2021 Veneto, ITA 45\nDiletta Tonello, "Marachelle", Frizzante Durella blend NV Veneto, ITA 53\nAldo Viola, "Brutto" Catarratto 2022 Sicily, ITA 49\nJoao Pato, "Ducking" Petnat Sercialinho 2022 Bairrada, PRT 61\nFamilie Bauer, "Unsprung" Petnat Gruner + Roter V. + Riesling 2021 Wagram, AUT 52\nRosé\nAnnesanti, "Raspato", Frizzante Rosato Sangiovese + Aleatico 2020 Umbria, ITA 57\nLaherte Frères, "Les Beaudiers", Rosé de Saignée Pinot Meunier NV Champagne, FRA 166\nWeszeli, "We Love Petnat" Zweigelt 2020 Kamptal, AUT 57\

In [8]:
# Parse the first page
results = parser.parse_wine_list(pages[0])

print(json.dumps(results, indent=2))

[
  {
    "id": null,
    "producer": "Ch\u00ebp\u00ecka",
    "name": "Petnat Catawba",
    "type": "PETNAT",
    "main_type": "WHITE",
    "region": "Finger Lakes",
    "country": "USA",
    "vintage": "2021",
    "price": "64",
    "size": "bottle"
  },
  {
    "id": null,
    "producer": "La Ferme du Vert",
    "name": "L' Angelou Blanc Bulle",
    "type": null,
    "main_type": "WHITE",
    "region": "Southwest",
    "country": "FRA",
    "vintage": "2021",
    "price": "56",
    "size": "bottle"
  },
  {
    "id": null,
    "producer": "Guiborat",
    "name": "Prisme - Blanc de Blancs",
    "type": "BLANC DE BLANCS, GRAND CRU EXTRA BRUT",
    "main_type": "SPARKLING",
    "region": "Champagne",
    "country": "FRA",
    "vintage": null,
    "price": "144",
    "size": "bottle"
  },
  {
    "id": null,
    "producer": "Pierre Moncuit",
    "name": "Delos Blanc de Blancs",
    "type": "BLANC DE BLANCS, GRAND CRU BRUT",
    "main_type": "SPARKLING",
    "region": "Champagne",
    "c

In [34]:
from functions import *
import pandas as pd

# Read the csv
df = pd.read_csv("menus/rake-wine.csv")

print(df.shape)
df.head()

(166, 10)


Unnamed: 0,id,producer,name,type,main_type,region,country,vintage,price,size
0,,Chëpìka,Petnat Catawba,PETNAT,WHITE,Finger Lakes,USA,2021.0,64.0,bottle
1,,La Ferme du Vert,L' Angelou Blanc Bulle,,WHITE,Southwest,FRA,2021.0,56.0,bottle
2,,Guiborat,Prisme - Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU EXTRA BRUT",SPARKLING,Champagne,FRA,,144.0,bottle
3,,Pierre Moncuit,Delos Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU BRUT",SPARKLING,Champagne,FRA,,117.0,bottle
4,,Robert Moncuit,Millésime Blanc de Blancs,"BLANC DE BLANCS, GRAND CRU BRUT",SPARKLING,Champagne,FRA,2013.0,196.0,bottle


In [16]:
# Get vivino data
vivino_df = vivino_search_multiprocess(df)

vivino_df.head()

  0%|          | 0/166 [00:00<?, ?it/s]


PicklingError: Can't pickle <class 'pandas.core.frame.Pandas'>: attribute lookup Pandas on pandas.core.frame failed

In [13]:
smaller_df = vivino_df.head(75)

smaller_df.shape

(75, 14)

In [14]:
smaller_df.to_csv("rake-wine-75.csv", index=False)

In [46]:
new_df.to_csv("rake-wine-full.csv", index=False)