In [6]:
# import dependencies
import selenium
import pandas as pd
import datetime
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from twilio.rest import Client
import time


# Get current date and time
dt = datetime.datetime.today()
 
# Format datetime string
dt = dt.strftime("%Y-%m-%d %I:%M:%S %p")

print(f'Starting Scrape: {dt}')

# create empty dataframe to store scraped data from all the stores
df = pd.DataFrame()

# define max attempts when program fails (per store)
max_attempts = 3

# define counter for failed attempts
attempt = 1

while attempt <= max_attempts:

    # try becuase sometimes the chrome webdriver randomly fails
    try:

        # define url to start scrape
        url = 'https://shop.bearcreekspirits.com/shop/?subtype=whiskey&order=price+desc'   

        # create a dataframe to store the data from this store
        df_store = pd.DataFrame()

        # Set up Selenium WebDriver 
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        driver = webdriver.Chrome(options=chrome_options)

        # Navigate to the URL
        driver.get(url)

        # -------------------------- Age Verification -------------------------------

        # Age button class="age-check-yes"
        # Wait for the Change button to be clickable and get the button
        age_button_xpath = "//button[@class='age-check-yes']"
        age_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, age_button_xpath)))

        # Click the Change button to open the store selection modal
        age_button.click()


        # -------------------------- PAGE 1 -------------------------------

        # Scrape the names of the bottles from the page 
        bottle_xpath = "//div[@class='ch-product-name']"
        bottles = driver.find_elements(By.XPATH, bottle_xpath)           
        bottle_list = [bottle.text for bottle in bottles]

        # Scrape the prices from the page 
        price_xpath = "//span[@class='ch-single-product-price'] | //div[@class='price-range']"
        prices = driver.find_elements(By.XPATH, price_xpath)
        price_list = [price.text.split(' ')[0].replace('$','').replace(',','') for price in prices]

        # add the data to the store dataframe
        df_store['bottles'] = bottle_list
        df_store['price'] = price_list
        df_store['ts'] = dt

        # append the data from the store dataframe to the master dataframe
        df = pd.concat([df,df_store], ignore_index=True)

        # -------------------------- PAGE 2 -------------------------------    

        # move to next page
        next_button_xpath = f"//a[@class='button ch-btn nav-button'][last()]"
        next_button = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, next_button_xpath)))

        # Click the next page button
        next_button.click()

        # Scrape the names of the bottles from the page 
        bottles = driver.find_elements(By.XPATH, bottle_xpath)           
        bottle_list = [bottle.text for bottle in bottles]

        # Scrape the prices from the page 
        prices = driver.find_elements(By.XPATH, price_xpath)
        price_list = [price.text.split(' ')[0].replace('$','').replace(',','') for price in prices]

        # add the data to the store dataframe
        df_store['bottles'] = bottle_list
        df_store['price'] = price_list
        df_store['ts'] = dt

        # append the data from the store dataframe to the master dataframe
        df = pd.concat([df,df_store], ignore_index=True)

        # -------------------------- PAGE 3 -------------------------------    

        # move to next page
        next_button = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, next_button_xpath)))

        # Click the next page button
        next_button.click()

        # Scrape the names of the bottles from the page 
        bottles = driver.find_elements(By.XPATH, bottle_xpath)           
        bottle_list = [bottle.text for bottle in bottles]

        # Scrape the prices from the page 
        prices = driver.find_elements(By.XPATH, price_xpath)
        price_list = [price.text.split(' ')[0].replace('$','').replace(',','') for price in prices]

        # add the data to the store dataframe
        df_store['bottles'] = bottle_list
        df_store['price'] = price_list
        df_store['ts'] = dt

        # append the data from the store dataframe to the master dataframe
        df = pd.concat([df,df_store], ignore_index=True)

        # if this was all successfull, increment the attempt variable so the loop will not repeat
        attempt = max_attempts + 1

        # print out that this attempt was successfull
        print(f'Success')


    # if the program fails
    except Exception as e:

            # if this is the final attempt print the final failure message 
        if attempt >= (max_attempts - 1):
            print(f'Final Failure')

        # if this isn't the final attempt print the a failure message and try again
        else:
            print(f'Failed Attempt #{attempt}')

            # increment attempt to mark this failure
            attempt += 1

# Close the browser
driver.quit()

Starting Scrape: 2023-12-10 10:39:33 AM
Success


In [7]:
df

Unnamed: 0,bottles,price,ts
0,Glenfiddich 23 Year Old Grand Cru Single Malt ...,429.99,2023-12-10 10:39:33 AM
1,Widow Jane The Vaults 14 Year Old Straight Bou...,291.99,2023-12-10 10:39:33 AM
2,Garrison Brothers Cowboy Bourbon® Whiskey,271.99,2023-12-10 10:39:33 AM
3,Crown Royal 23 Year Golden Apple Canadian Whisky,269.99,2023-12-10 10:39:33 AM
4,Dalmore Cigar Malt Reserve,213.99,2023-12-10 10:39:33 AM
5,The Macallan 15 Year Double Cask Single Malt S...,202.99,2023-12-10 10:39:33 AM
6,WhistlePig Old World 12 Year Rye Whiskey,189.99,2023-12-10 10:39:33 AM
7,Highland Park Texas Edition No,179.99,2023-12-10 10:39:33 AM
8,Redbreast 15YR Irish Whiskey,173.99,2023-12-10 10:39:33 AM
9,Mitchell & Son Yellow Spot 12 Year Old Single ...,166.99,2023-12-10 10:39:33 AM
