# What's in this notebook?
Here's where we'll get all of our data to analyze pumpkin pie recipes. We'll use allrecipes.com to get recipes and their ratings. 

In [128]:
import numpy as np
import pandas as pd
import selenium
import time
from tqdm import tqdm_notebook

## Getting Links
Because Allrecipes has infinite scrolling, we use selenium to get all of the recipes. There weren't too many recipes, so I just scrolled down to the bottom and then ran the for loop.

In [120]:
from selenium import webdriver
driver = webdriver.Chrome('/Users/hannah/Downloads/chromedriver')
#driver.get('https://www.allrecipes.com/recipes/819/desserts/pies/pumpkin-pie/?internalSource=hubcard&referringContentType=Search&clickId=cardslot%201')

In [43]:
links = []
for elem in driver.find_elements_by_class_name('fixed-recipe-card__h3'):
    try:
        links.append(elem.find_elements_by_tag_name('a')[0].get_attribute('href'))
    except IndexError:
        pass

In [45]:
len(links)

143

## Getting recipes and ratings

In [125]:
data = []
redo = [] # to retry
for link in tqdm_notebook(links):
    try:
        driver.get(link)

        # get ratings
        rating = driver.find_element_by_class_name('rating-stars').get_attribute('data-ratingstars')

        # get ingredients
        ingredients = []
        # last element is not an ingredient
        for elem in driver.find_elements_by_class_name('checkList__line')[:-1]:
            ingredients.append(elem.find_element_by_tag_name('label').get_attribute('title'))

        # get directions
        directions = []
        for elem in driver.find_elements_by_class_name('recipe-directions__list--item'):
            directions.append(elem.text)

        # get nutrition information
        nutrition_facts = {}
        for fact in ['calories', 'fatContent', 
                     'carbohydrateContent', 'proteinContent', 
                     'cholesterolContent', 'sodiumContent']:
            nutrition_facts[fact] = driver.find_element_by_xpath(f"//span[@itemprop='{fact}']").text

        data.append({'link': link, 'rating': rating, 'ingredients': ingredients, 
                    'directions': directions, 'nutrition': nutrition_facts})
    except:
        redo.append(link)

    time.sleep(.35)

HBox(children=(IntProgress(value=0, max=143), HTML(value='')))

In [126]:
data

[{'link': 'https://www.allrecipes.com/recipe/16052/paradise-pumpkin-pie-i/?internalSource=rotd&referringId=819&referringContentType=Recipe%20Hub',
  'rating': '4.41304349899292',
  'ingredients': ['1 (9 inch) pie shell',
   '1 (8 ounce) package cream cheese, softened',
   '1/4 cup white sugar',
   '1/2 teaspoon vanilla extract',
   '1 egg, beaten',
   '1 1/4 cups pumpkin puree',
   '1 cup evaporated milk',
   '1/2 cup white sugar',
   '2 eggs, beaten',
   '1 teaspoon ground cinnamon',
   '1/4 teaspoon ground ginger',
   '1/4 teaspoon ground nutmeg',
   '1 pinch salt',
   '1/4 cup maple syrup'],
  'directions': ['Preheat oven to 350 degrees F (175 degrees C).',
   'In a small mixing bowl combine cream cheese, 1/4 cup sugar, and vanilla extract. Beat well, then add 1 egg and mix until thoroughly combined. Spread onto the bottom of pie shell and set aside.',
   'In a medium bowl combine pumpkin, evaporated milk, 1/2 cup sugar, 2 eggs, cinnamon, ginger, nutmeg, and salt. Mix well, then pou

In [130]:
df = pd.DataFrame(data)

In [149]:
df = df.astype({'rating':'float'}).round({'rating': 1})

In [150]:
df

Unnamed: 0,directions,ingredients,link,nutrition,rating
0,[Preheat oven to 350 degrees F (175 degrees C)...,"[1 (9 inch) pie shell, 1 (8 ounce) package cre...",https://www.allrecipes.com/recipe/16052/paradi...,"{'calories': '361 calories;', 'fatContent': '1...",4.4
1,[Preheat oven to 425 degrees F (220 degrees C....,[1 recipe pastry for a 9 inch single crust pie...,https://www.allrecipes.com/recipe/12141/moms-p...,"{'calories': '345 calories;', 'fatContent': '1...",4.5
2,[Preheat oven to 425 degrees F (220 degrees C)...,"[1 (15 ounce) can pumpkin puree, 3 egg yolks, ...",https://www.allrecipes.com/recipe/230132/chef-...,"{'calories': '320 calories;', 'fatContent': '1...",4.8
3,[Preheat oven to 350 degrees F (175 degrees C)...,"[2 prepared pie crusts, 3 eggs, divided, 1 (8 ...",https://www.allrecipes.com/recipe/219025/mini-...,"{'calories': '138 calories;', 'fatContent': '8...",4.8
4,[Line one 9 inch pie pan with whole gingersnap...,"[1 cup pumpkin puree, 3 eggs, 1/2 cup white su...",https://www.allrecipes.com/recipe/9121/dads-pu...,"{'calories': '343 calories;', 'fatContent': '1...",4.4
5,[Preheat oven to 375 degrees F (190 degrees C)...,[1 recipe pastry for a 9 inch single crust pie...,https://www.allrecipes.com/recipe/13504/gourme...,"{'calories': '310 calories;', 'fatContent': '1...",4.0
6,[Preheat oven to 375 degrees F (190 degrees C)...,"[1 (9 inch) unbaked pie crust, 3/4 cup toffee ...",https://www.allrecipes.com/recipe/24198/pumpki...,"{'calories': '504 calories;', 'fatContent': '2...",4.5
7,[Preheat oven to 350 degrees F (175 degrees C)...,"[1 (9 inch) pie shell, 1 (8 ounce) package cre...",https://www.allrecipes.com/recipe/15947/old-fa...,"{'calories': '430 calories;', 'fatContent': '2...",4.6
8,"[Preheat oven to 425 degrees F. Whisk pumpkin,...","[1 (15 ounce) can pumpkin, 1 (14 ounce) can EA...",https://www.allrecipes.com/recipe/23439/perfec...,"{'calories': '379 calories;', 'fatContent': '1...",4.7
9,"[Preheat oven to 425 degrees F., Combine sugar...","[1 (9 inch) unbaked deep dish pie crust, 3/4 c...",https://www.allrecipes.com/recipe/22755/libbys...,"{'calories': '283 calories;', 'fatContent': '1...",4.8
