In [1]:
# Import Libraries
import time
import pandas as pd

from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:
# Disable webpage and Image Loading

# Firefox
options = webdriver.FirefoxOptions()
options.add_argument("--headless")  # Enable headless mode
options.set_preference("permissions.default.image", 2)  # Disable images loading
driver = webdriver.Firefox(options=options)
driver.implicitly_wait(3)

# chrome
#options = webdriver.ChromeOptions()
#options.add_argument("--headless=new")
#options.add_argument('--blink-settings=imagesEnabled=false')
#driver = webdriver.Chrome(options=options)
#driver.implicitly_wait(3)

In [3]:
# Navigate to Google Search
search_keyword = "pisco"
driver.get("https://www.lider.cl/supermercado/search?query=" + search_keyword)


### revisar for en sold out y continue

In [4]:
# Scrape 
def scrape_page():
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, "html.parser")
    search_results = soup.find_all("li", class_="ais-Hits-item")

    for result in search_results:
        is_sold_out = result.find('span', class_='w_Df w_Dl w_Do tags')
        if is_sold_out:
            continue

    data_list = []

    for result in search_results:
    
        product_data = {}
           
        product_description=result.find('h2', class_='d-flex text-left product-description mb-0')
        product_data['Descripción'] = product_description.get_text(strip=False) if product_description else None
        
        sale_price=result.find('div', class_='product-card__sale-price')
        try:
            product_data['Precio Venta'] = int(sale_price.find('span').get_text(strip=True).replace('$', '').replace('.', '')) 
        except (ValueError, AttributeError):
            product_data['Precio Venta'] = sale_price.find('span').get_text(strip=True) if sale_price else None

        try:
            regular_price = result.find('div', class_='regular-unit-price__price-product-card')
            product_data['Precio Referencia'] = int(regular_price.find('span').get_text(strip=True).replace('$', '').replace('.', ''))
        except:
            reference_price = result.find('span', class_='reference-price__price')
            product_data['Precio Referencia'] = int(reference_price.get_text(strip=True).replace('$', '').replace('.', '')) if reference_price else None
          
        discount_percentage = result.find('div', class_='discount-percentage__percentage-saved')
        product_data['Porcentaje Descuento'] = int(discount_percentage.get_text(strip=True).replace('%', '')) if discount_percentage else None
            
        data_list.append(product_data)

    return data_list
    

In [5]:
# Function to check if there is a next page
def has_next_page():
    try:
        next_page_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, '.ais-Pagination-item--nextPage'))
        )
        return 'disabled' not in next_page_button.get_attribute('class')
    except:
        return False

# Loop through pages
page_number = 1
data_list = []

while True:
    print(f"Scraping data from page {page_number}")
    data_list += scrape_page()

    if not has_next_page():
        break

    # Click the next page button
    next_page_button = driver.find_element("css selector", '.ais-Pagination-item--nextPage')
    next_page_button.click()
    page_number += 1

# Close webdriver
driver.quit()

Scraping data from page 1
Scraping data from page 2
Scraping data from page 3
Scraping data from page 4
Scraping data from page 5


In [9]:
# Create a DataFrame of Pandas
df = pd.DataFrame(data_list)
df.sort_values(by=['Porcentaje Descuento'], ascending=False, inplace=True)
# Print the DataFrame
df.head(20)

Unnamed: 0,Descripción,Precio Venta,Precio Referencia,Porcentaje Descuento
5,"Alto del Carmen Pisco especial 35° Botella, 75...",4990,7190.0,31.0
64,,3690,4890.0,25.0
41,"Tres Erres Pisco 35° 750 ml Botella, 750 ml",5390,6990.0,23.0
33,Pisco Diablo Pisco Diablo Reservado Transparen...,8490,10890.0,22.0
56,"Capel Cóctel Pisco Sour Light 12° Botella, 700 ml",3990,5090.0,22.0
55,"Capel Cóctel Pisco sour 14° Botella, 700 ml",3990,5090.0,22.0
10,Alto del Carmen Pisco Roble del Sur 35° Botell...,5790,7190.0,19.0
36,"Malpaso Pisco Mal Paso 35°, 750 cc",7990,9890.0,19.0
1,Alto del Carmen Pisco reservado transparente 4...,6990,8490.0,18.0
15,Alto del Carmen Pisco Alto del Carmen Barricas...,6990,8490.0,18.0
