In [2]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Initialize Chrome WebDriver
driver = webdriver.Chrome()

# URL to scrape
base_url = 'https://www.vivino.com/explore?e=eJwNyTEOgCAMBdDbdMbB8Y96ABJnU6EQEgFTCJHb61tfViyUU4GhzC9WY8hN7JYctsPS828MGKxJOt9U1cNLc1SvCeWeSmwnD1GO8gGLohme'

# List to store all wine information
wines = []

def scrape_page():
    time.sleep(10)  
    # Find all wine elements
    try:
        wine_elements = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div[class^='wineCard__wineCard']"))
        )
        print(f"Found {len(wine_elements)} wine elements on the page.")
        
        for wine_element in wine_elements:
            try:
                wine_name = wine_element.find_element(By.CSS_SELECTOR, "div[class^='wineInfoVintage__vintage']").text
                wine_producer = wine_element.find_element(By.CSS_SELECTOR, "div[class^='wineInfoVintage__truncate']").text
                rating = wine_element.find_element(By.CSS_SELECTOR, "div[class^='vivinoRating_averageValue']").text if wine_element.find_elements(By.CSS_SELECTOR, "div[class^='vivinoRating_averageValue']") else "N/A"
                country = wine_element.find_element(By.CSS_SELECTOR, "div[class^='wineInfoLocation__regionAndCountry']").text if wine_element.find_elements(By.CSS_SELECTOR, "div[class^='wineInfoLocation__regionAndCountry']") else "N/A"
                price = wine_element.find_element(By.CSS_SELECTOR, "div[class^='addToCartButton__price']").text if wine_element.find_elements(By.CSS_SELECTOR, "div[class^='addToCartButton__price']") else "N/A"
                #price_info = wine_element.find_element(By.CSS_SELECTOR, "div[class^='addToCartButton__price']").find_element(By.TAG, "div")
                
                wines.append({
                    'wine_name': wine_name,
                    'wine_producer': wine_producer,
                    'rating': rating,
                    'country': country,
                    'price': price[2:]
                })
                
                if len(wines) >= 100:
                    return True
                
            except Exception as e:
                print(f"Error scraping wine data: {e}")
                
    except Exception as e:
        print(f"Error finding wine elements: {e}")
    return False

# Start scraping from the first page
driver.get(base_url)
if scrape_page():
    driver.quit()
    exit()

# Pagination logic (Vivino loads more results as you scroll down)
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(5)  # Wait for new page segment to load
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height
    if scrape_page():
        break

# Create a DataFrame from the collected wine data
wine_df = pd.DataFrame(wines)

# Print the DataFrame
print(wine_df)



# Close the driver
driver.quit()



Found 25 wine elements on the page.
Found 50 wine elements on the page.
Found 75 wine elements on the page.
                                            wine_name      wine_producer  \
0                           Unico (Gran Reserva) 2010       Vega Sicilia   
1                                      Sauternes 2015    Château d'Yquem   
2                      Don PX Convento Selección 1946        Toro Albalá   
3                                      Sauternes 2017    Château d'Yquem   
4                                  Viña El Pison 2018             Artadi   
..                                                ...                ...   
95                                     Sauternes 1997    Château d'Yquem   
96  Vigneto Il Fornetto Amarone della Valpolicella...  Stefano Accordini   
97                                     Sauternes 2005    Château d'Yquem   
98                         Valbuena 5º (Reserva) 2018       Vega Sicilia   
99  Amarone della Valpolicella Classico Vigneto Al...   

In [3]:
wine_df

Unnamed: 0,wine_name,wine_producer,rating,country,price
0,Unico (Gran Reserva) 2010,Vega Sicilia,48,"Ribera del Duero, Espagne",45820
1,Sauternes 2015,Château d'Yquem,48,"Sauternes, France",440
2,Don PX Convento Selección 1946,Toro Albalá,48,"Montilla-Moriles, Espagne",49940
3,Sauternes 2017,Château d'Yquem,48,"Sauternes, France",43130
4,Viña El Pison 2018,Artadi,48,"Vino de España, Espagne",39433
...,...,...,...,...,...
95,Sauternes 1997,Château d'Yquem,47,"Sauternes, France",390
96,Vigneto Il Fornetto Amarone della Valpolicella...,Stefano Accordini,47,"Amarone della Valpolicella Classico, Italie",28290
97,Sauternes 2005,Château d'Yquem,47,"Sauternes, France",404
98,Valbuena 5º (Reserva) 2018,Vega Sicilia,47,"Ribera del Duero, Espagne",465


In [4]:
wine_df['rating'] = wine_df['rating'].str.replace(',', '.').astype(float)


In [5]:

wine_df['price'] = wine_df['price'].str.replace(',', '.').astype(float)


In [6]:
wine_df.dtypes

wine_name         object
wine_producer     object
rating           float64
country           object
price            float64
dtype: object

In [7]:

wine_df[['region', 'country']] = wine_df['country'].str.split(',', expand=True)


In [9]:
wine_df

Unnamed: 0,wine_name,wine_producer,rating,country,price,region
0,Unico (Gran Reserva) 2010,Vega Sicilia,4.8,Espagne,458.20,Ribera del Duero
1,Sauternes 2015,Château d'Yquem,4.8,France,440.00,Sauternes
2,Don PX Convento Selección 1946,Toro Albalá,4.8,Espagne,499.40,Montilla-Moriles
3,Sauternes 2017,Château d'Yquem,4.8,France,431.30,Sauternes
4,Viña El Pison 2018,Artadi,4.8,Espagne,394.33,Vino de España
...,...,...,...,...,...,...
95,Sauternes 1997,Château d'Yquem,4.7,France,390.00,Sauternes
96,Vigneto Il Fornetto Amarone della Valpolicella...,Stefano Accordini,4.7,Italie,282.90,Amarone della Valpolicella Classico
97,Sauternes 2005,Château d'Yquem,4.7,France,404.00,Sauternes
98,Valbuena 5º (Reserva) 2018,Vega Sicilia,4.7,Espagne,465.00,Ribera del Duero


In [13]:
# Save the DataFrame to a CSV file
output_path = "/Users/Barbara/Desktop/Ironhack/Final_Project/final-project/Webscraping/vivino_scraping.csv"  # Replace with your desired file path
wine_df.to_csv(output_path, index=False)

In [11]:
wine_df['country'].value_counts()

country
France        38
Italie        29
Espagne       23
Allemagne      3
Australie      3
États-Unis     3
Argentine      1
Name: count, dtype: int64