In [27]:
from dotenv import load_dotenv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

from bs4 import BeautifulSoup
import pandas as pd
import re
load_dotenv()

True

In [28]:
# grab the zip code we've saved as an environment variable
zip_code = os.environ.get('ZIP')

driver = webdriver.Firefox()
driver.maximize_window()
# access Whole Foods' products that are on sale via selenium
driver.get("https://www.wholefoodsmarket.com/products/all-products?featured=on-sale")
assert "Whole Foods" in driver.title

# wait for the page to load
wait = WebDriverWait(driver, 3)

# try/except in case selenium opens us logged in for some reason
try:
    # we create a variable to grab the search field
    store = driver.find_element(by=By.ID, value="pie-store-finder-modal-search-field")
    # ensure it's empty
    store.clear()
    # selenium adds our zip code into the text field
    store.send_keys(zip_code)
    store.send_keys(Keys.RETURN)
    
    # wait until a nearby Whole Foods shows up as a clickable option
    closest = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'li.wfm-search-bar--list_item:nth-child(1)')))
    closest.click()
except:
    NoSuchElementException('Either you already have the lcoation or there\'s an error')

# the on sale products page does not show all of the products
# in fact, there are so many deals on sale that I have no idea
# what the exact amount is, so I limit the amount of extra
# product pages that we load more to 5

# we wait for the load more button to load since it doesn't initially
load_more = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.w-button:nth-child(5)")))
for _ in range(5):
    # we click 5 times
    load_more.click()

In [None]:
# grabbing the html source after loading more deals
# than we would have gotten after initially loading
# the page
content = driver.page_source
driver.close()

# make working with the html easier with BeautifulSoup
dom = BeautifulSoup(content, 'html.parser')
body = dom.body

WebDriverException: Message: Failed to decode response from marionette


In [None]:
# select the products by grabbing all content in the body
# that have the class for a product item

# we need to use a dictionary to select this css atrribute
# because python already has a class keyword

# EDIT: after doing a ton of work with this dictionary-type
# attribute search style, I learned BeautifulSoup can just
# use `class_` in order to search by CSS class :/
products = body.find_all(attrs={'class': 'w-pie--product-tile'})

In [None]:
data = []

# loop through every product
for i in products:
    # search for elements with the pricing information
    info = i.find(attrs={'class': 'w-pie--product-tile__content'})
    
    brand = info.find(attrs={'class': 'w-cms--font-disclaimer'}).text
    item = info.find(attrs={'class': 'w-cms--font-body__sans-bold'}).text
    
    # need to use regex to find numbers following dollar sign
    # or if there's nothing following the numbers
    # or if there's `/lb` following the numbers
    regular = info.find(attrs={'class': 'regular_price has_sale'}).text
    # print(regular)
    
    sale = info.find(attrs={'class': 'sr-only'}).next_sibling
    re_sale = re.search(r'(\$)(.*)', sale)[2]
    # print(re_sale)
    re_regular = re.search(r'(\$)(.*)', regular)[2]
    # print(re_regular)
    try:
        content_prime = info.find(attrs={'class': 'prime_price'})
        prime = content_prime.find(class_='sr-only').next_sibling
        re_prime = re.search(r'(\$)(.*)', prime)[2]
        
    except:
        AttributeError('There may not be a prime price, so lets set it to the sale price')
        print("No prime price")
        re_prime = re_sale
        
    try:
        lb = re_sale.index('/lb')
        re_sale = re_sale[:lb]
    except:
        ValueError("Looks like this isn't measured in lb")
    try:    
        lb2 = re_prime.index('/lb')
        re_prime = re_prime[:lb2]
    except:
        ValueError("Looks like this isn't measured in lb")
    try:    
        lb3 = re_regular.index('/lb')
        re_regular = re_regular[:lb3]
    except:
        ValueError("Looks like this isn't measured in lb")
    print("Regular:", re_regular)
    print("Sale:", re_sale)
    print("Prime:", re_prime)
    
    data.append({'Brand': brand,
                'Item': item,
               'Regular': float(re_regular),
               'Sale': float(re_sale),
               'Prime': float(re_prime)})

Regular: 5.99
Sale: 3.99
Prime: 3.59
Regular: 5.49
Sale: 4.49
Prime: 4.04
Regular: 2.99
Sale: 2.49
Prime: 2.24
Regular: 3.49
Sale: 2.99
Prime: 2.69
Regular: 7.49
Sale: 5.49
Prime: 4.94
Regular: 12.99
Sale: 8.99
Prime: 8.09
Regular: 5.79
Sale: 5.29
Prime: 4.76
Regular: 24.99
Sale: 19.99
Prime: 17.99
Regular: 2.79
Sale: 2.49
Prime: 2.24
Regular: 9.99
Sale: 9.49
Prime: 8.54
Regular: 7.99
Sale: 14
Prime: 12.60
Regular: 8.99
Sale: 8.49
Prime: 7.64
Regular: 6.49
Sale: 4.99
Prime: 4.49
Regular: 11.79
Sale: 9.79
Prime: 8.81
Regular: 8.99
Sale: 8.49
Prime: 7.64
Regular: 7.99
Sale: 6.99
Prime: 6.29
Regular: 4.39
Sale: 3.49
Prime: 3.14
Regular: 6.49
Sale: 4.99
Prime: 4.49
Regular: 3.99
Sale: 2.99
Prime: 2.69
Regular: 5.79
Sale: 5.29
Prime: 4.76
Regular: 5.29
Sale: 4.99
Prime: 4.49
Regular: 5.99
Sale: 4.99
Prime: 4.49
Regular: 5.99
Sale: 4.99
Prime: 4.49
Regular: 6.69
Sale: 4.49
Prime: 4.04
Regular: 10.49
Sale: 9.99
Prime: 8.99
Regular: 10.99
Sale: 9.49
Prime: 8.54
Regular: 5.99
Sale: 4.79
Prime: 

In [None]:
df = pd.DataFrame(data)

In [None]:
df

Unnamed: 0,Brand,Item,Regular,Sale,Prime
0,PRODUCE,Organic Green Asparagus,5.99,3.99,3.59
1,Oatly,"Original Oatmilk, 64 fl oz",5.49,4.49,4.04
2,PRODUCE,Red Seedless Grapes,2.99,2.49,2.24
3,PRODUCE,Organic Tomato On The Vine,3.49,2.99,2.69
4,MEAT,Ground Beef 90% Lean/ 10% Fat,7.49,5.49,4.94
...,...,...,...,...,...
295,Cedar's Mediterranean Foods,"Organic Lemon Hommus, 10 oz",3.69,3.49,3.14
296,Justin's,"Honey Almond Butter, 16 oz",11.99,9.99,8.99
297,Brew Dr. Kombucha,"Clear Mind Kombucha, 4pk cans, 12oz each",11.99,8.99,8.09
298,One Degree Organic Foods,"Brown Rice Cacao Crisps, 10 oz",4.99,3.99,3.59
