In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
rows = []

for page_num in range(1, 11):
    url = f"https://www.epicurious.com/search/cucumbers?page={page_num}"
    # print("Now scraping", url)
    
    response = requests.get(url)
    doc = BeautifulSoup(response.text)
    
    results = doc.find_all("article")
    for result in results[:-1]:
        row = {}
        row['Category'] = result.find(class_="tag").text.strip()
        row['Title'] = result.find("h4").text.strip()
        row['Summary'] = result.find(class_="dek").text.strip()
        try:
            row['Rating'] = result.find(class_="rating").text.strip()
        except:
            pass
        try:
            row['Would make again'] = result.find(class_="make-again-percentage").text.strip()
        except:
            pass
        row['URL'] = result.find("a")['href']
        rows.append(row)

df = pd.DataFrame(rows)

In [3]:
df['Rating'] = df.Rating.str.extract("(\d.*)/.", expand=False)

In [4]:
df = df[['Category', 'Title', 'Summary', 'Rating', 'Would make again', 'URL']]
df.head()

Unnamed: 0,Category,Title,Summary,Rating,Would make again,URL
0,recipe,Spicy Lightly Pickled Cucumbers,These quick pickles have just the right amount...,4,100%,/recipes/food/views/spicy-lightly-pickled-cucu...
1,recipe,Cucumbers with Ajo Blanco Sauce,Think of this as an all-purpose garlic sauce. ...,4,100%,/recipes/food/views/cucumbers-with-ajo-blanco-...
2,recipe,Cold Beef Tenderloin with Tomatoes and Cucumbers,Beef tenderloin is precious enough to baby on ...,2,0%,/recipes/food/views/cold-beef-tenderloin-with-...
3,recipe,Fried Fish Sandwiches with Cucumbers and Tarta...,Take your cast-iron pan to the grill for this ...,3,100%,/recipes/food/views/fried-fish-sandwiches-with...
4,recipe,Grain Salad with Tomatoes and Cucumbers,We call for semi-pearled grains because they c...,3,100%,/recipes/food/views/farro-spelt-grain-salad-wi...


In [5]:
df.to_csv("cucumber-results.csv", index=False)

# Epicurious, Part 2: Once-per-row scraping

In [6]:
import pandas as pd

In [7]:
df = pd.read_csv("cucumber-results.csv")
df2 = df[df.Category == 'recipe']

In [8]:
def scrape_page(row):
    url = f"https://www.epicurious.com{row['URL']}"
    # print("Scraping", row['URL'])
    
    response = requests.get(url)
    doc = BeautifulSoup(response.text)

    page = {}
    ingredients = doc.find_all(class_='ingredient')
    page['Ingredients'] = '\n'.join([ingredient.text.strip() for ingredient in ingredients])
    directions = doc.find_all(class_="preparation-step")
    page['Directions'] = '\n'.join([direction.text.strip() for direction in directions])
    tags = doc.find(class_="menus-tags content").find_all('dt')
    page['Tags'] = '\n'.join([tag.text.strip() for tag in tags])

    return pd.Series(page)

In [9]:
scraped_df = df2.apply(scrape_page, axis=1)
scraped_df.head()

Unnamed: 0,Ingredients,Directions,Tags
0,"2 lb. medium Persian cucumbers (about 12), cut...","Toss cucumbers in a large bowl with vinegar, s...",Bon Appétit\nPickles\nHors D'Oeuvre\nAppetizer...
1,5 mini seedless or Persian cucumbers (about 12...,"Preheat oven to 350°F. Cut cucumbers about 1"" ...",Bon Appétit\nSauce\nCondiment\nGarlic\nAlmond\...
2,"1/4 cup extra-virgin olive oil, plus more\n1 (...",Prepare a grill for 2-zone heat (for a charcoa...,Bon Appétit\nDinner\nBeef Tenderloin\nBeef\nTo...
3,1/2 cup mayonnaise\n1/4 cup finely grated lemo...,"Mix mayonnaise, lemon zest and juice, relish, ...",Bon Appétit\nSandwich\nFish\nSeafood\nFry\nBee...
4,2 cups semi-pearled farro or spelt\nKosher sal...,Preheat oven to 350°F. Cook farro in a large D...,Bon Appétit\nSalad\nSide\nGrains\nCucumber\nTo...


In [10]:
df3 = df2.merge(scraped_df, left_index=True, right_index=True)

In [11]:
df3.to_csv("cucumber-recipes.csv", index=False)