## Notebook for testing out the NYT Scraper

In [1]:
from bs4 import BeautifulSoup
import requests
import numpy as np
from collections import OrderedDict

In [115]:
#url = 'http://cooking.nytimes.com/recipes/1017728-spicy-fried-shrimp-with-green-chutney'
url = 'http://cooking.nytimes.com/recipes/1015865-tomato-bisque-with-fresh-goat-cheese'
if 'cooking.nytimes.com' in url:
    html = requests.get(url)
else:
    print("URL is no good")

In [116]:
soup = BeautifulSoup(html.content, "html.parser")

In [117]:
try:
    recipe_name = soup.find('h1', {'class': 'recipe-title title name'}).text.strip()
except AttributeError:
    recipe_name = []
    
print(recipe_name)

Tomato Bisque With Fresh Goat Cheese


In [118]:
# <span class="byline-name" itemprop="author">Martha Rose Shulman</span>

try:
    recipe_author = soup.find('span', {'class': 'byline-name', 'itemprop': 'author'}).text.strip()
except AttributeError:
    recipe_author = []
    
print(recipe_author)

Suzanne Lenzer


In [119]:
try:
    img_url = soup.find('div', {'class': 'recipe-intro'}).find('img')['src']
except AttributeError:
    img_url = None

print(img_url)

http://graphics8.nytimes.com/images/2014/01/01/dining/01CHEESE4_SPAN/01CHEESE4-articleLarge.jpg


In [120]:
try:
    time_yield = [ty.text.strip() for ty in soup.find('ul', {'class': 'recipe-time-yield'}).findAll('li')]
except AttributeError:
    time_yield = []

print(time_yield)

['Time45 minutes', 'Yield4 servings']


In [121]:
try:
    description = soup.find('div', {'itemprop': 'description'}).text.strip()
except AttributeError:
    description = ''
    
print(description)

A classic crumbler like Vermont Creamery’s fresh goat cheese can be swirled into a creamy tomato bisque just before serving, enriching the soup and preserving the occasional lemony crumb. 

Featured in: 
Turning Cheese Into A Cook’s Assistant.


In [122]:
try:
    categories = [a.text for a in soup.find('p', {'class': 'special-diets tag-block'}).findAll('a')]
except AttributeError:
    categories = []
    
print(categories)

['Vegetarian', 'Cheese', 'Tomato']


In [123]:
rec_in_wrap = soup.find('section', {'class': 'recipe-ingredients-wrap'})
headers = [t.text.strip().replace(':', '') for t in rec_in_wrap.findAll('h4', {'class': 'part-name'})]
print(headers, len(headers))

[] 0


In [124]:
len(headers)

0

In [125]:
#l = rec_in_wrap.findAll('ul', {'class': 'recipe-ingredients'})
#l[0].findAll('li', {'itemprop': 'recipeIngredient'})
if not len(headers):
    hix=-1
else:
    hix = len(headers)

ingredients = [[t.text.strip().replace('\n', ' ') 
                for t in l.findAll('li', {'itemprop': 'recipeIngredient'})
               ] 
               for l in rec_in_wrap.findAll('ul', {'class': 'recipe-ingredients'})
              ][:hix]

ingredient_names = [[t.text.strip().replace('\n', ' ') 
                for t in l.findAll('span', {'itemprop': 'name'})
               ] 
               for l in rec_in_wrap.findAll('ul', {'class': 'recipe-ingredients'})
              ][:hix]

ing_dict = OrderedDict()
if len(headers) == 0:
    ing_dict['main'] = ingredients[0]
elif len(headers) >= 0:
    for head, ing in zip(headers, ingredients):
        ing_dict[head] = ing

In [126]:
ing_dict

OrderedDict([('main',
              ['2 tablespoons olive oil',
               '1 tablespoon minced garlic',
               '1 tablespoon grated ginger',
               '1 cup diced onion',
               '1 28-ounce can crushed Italian plum tomatoes',
               '1 tablespoon sugar',
               '1 tablespoon fine sea salt, or to taste',
               '½ teaspoon red chile flakes, more to taste',
               'Pinch of cayenne pepper',
               '4 ounces good-quality fresh goat cheese, such as Vermont Creamery'])])

In [127]:
# for k, v in ing_dict.items():
#     print(k.upper())
#     print('  '+'\n  '.join(v))
#     print('')

print('\n'.join(["{}\n{}".format(k.upper(), '  '+'\n  '.join(v)) for k, v in ing_dict.items()]))

MAIN
  2 tablespoons olive oil
  1 tablespoon minced garlic
  1 tablespoon grated ginger
  1 cup diced onion
  1 28-ounce can crushed Italian plum tomatoes
  1 tablespoon sugar
  1 tablespoon fine sea salt, or to taste
  ½ teaspoon red chile flakes, more to taste
  Pinch of cayenne pepper
  4 ounces good-quality fresh goat cheese, such as Vermont Creamery


In [30]:
try:
    ingredients_full = [n.text.strip().replace('\n', ' ') 
                        for n in soup.find('ul', {'class': 'recipe-ingredients'})
                                                .findAll('li')]
    ingredients_name = [n.text for n in soup.find('ul', {'class': 'recipe-ingredients'})
                                    .findAll('span', {'itemprop': 'name'})]
except AttributeError:
    ingredients_full = []
    ingredients_name = []
    
#print(ingredients_name)
print(ingredients_full)

[u'1 pound medium shrimp, peeled and deveined', u'1 teaspoon salt', u'\xbd teaspoon turmeric', u'\xbc teaspoon cayenne', u'\xbd teaspoon black pepper', u'\xbd teaspoon grated garlic', u'\xbd teaspoon grated ginger', u'\xbd teaspoon garam masala', u'1 teaspoon green or red chile, minced', u'2 tablespoons chopped cilantro', u'2 tablespoons chopped mint', u'2 tablespoons lemon juice', u'3 tablespoons chickpea flour (besan flour)', u'3 tablespoons rice flour', u'Vegetable oil for frying']


In [13]:
try:
    directions = [l.text for l in soup.find('ol', {'class': 'recipe-steps'}).findAll('li')]
except AttributeError:
    directions = []
    
print(directions)

[u'Preheat oven to 450.', u'Scrub potatoes under running water; dry them, and rub the skin of each with the oil and a little salt. Pierce the skin of each in three or four places with the tines of a fork.', u'Place the potatoes in the oven, and roast for 45 minutes to an hour, depending on the size of the potatoes, until they offer no resistance when a knife is inserted in their centers.', u'Remove the potatoes from the oven, slice them open down the middle, apply a tablespoon of butter to each one and serve immediately.']


In [14]:
try:
    notes = [l.text.strip() for l in soup.find('ul', {'class': 'recipe-notes'}).findAll('li')]
except AttributeError:
    notes = []
    
print(notes)

[]


In [15]:
try:
    servings = soup.find('div', {'class': 'nutrition-tooltip'}).find('h5').text.strip()
    nutrition = soup.find('span', {'class': 'description', 'itemprop': 'nutrition'}).text.strip()
except AttributeError:
    servings, nutrition = [], []
    
print(servings)
print(nutrition)

Nutritional analysis per serving (4 servings)
330 calories; 12 grams fat; 7 grams saturated fat; 0 grams trans fat; 3 grams monounsaturated fat; 0 grams polyunsaturated fat; 50 grams carbohydrates; 3 grams dietary fiber; 1 gram sugars; 6 grams protein; 30 milligrams cholesterol; 566 milligrams sodium


In [2]:
from recipe_parsers import NYTimesCooking

In [3]:
url = 'http://cooking.nytimes.com/recipes/1017728-spicy-fried-shrimp-with-green-chutney'
html = requests.get(url)

In [4]:
nyt = NYTimesCooking(html.content)
print(nyt)

NAME
----
Spicy Fried Shrimp With Green Chutney

AUTHOR
------
David Tanis

DESCRIPTION
-----------
This highly seasoned Indian approach to fried shrimp elevates the concept. Perfect for snacking with drinks, it can be a meal with rice, dal and vegetables.

Featured in: 
Fried Shrimp That Welcome The Spice.

INGREDIENTS
-----------
FOR THE SHRIMP:
  1 pound medium shrimp, peeled and deveined
  1 teaspoon salt
  ½ teaspoon turmeric
  ¼ teaspoon cayenne
  ½ teaspoon black pepper
  ½ teaspoon grated garlic
  ½ teaspoon grated ginger
  ½ teaspoon garam masala
  1 teaspoon green or red chile, minced
  2 tablespoons chopped cilantro
  2 tablespoons chopped mint
  2 tablespoons lemon juice
  3 tablespoons chickpea flour (besan flour)
  3 tablespoons rice flour
  Vegetable oil for frying

FOR THE CHUTNEY:
  ¼ cup roughly chopped mint
  2 cups roughly chopped cilantro
  1 small garlic clove, minced
  1 tablespoon grated ginger
  2 tablespoons brown sugar
  ½ teaspoon salt
  ½ teaspoon ground cu

In [5]:
url = 'http://cooking.nytimes.com/recipes/1015865-tomato-bisque-with-fresh-goat-cheese'
html = requests.get(url)

In [None]:
#html = requests.get('http://cooking.nytimes.com/recipes/1014382-hazelnut-cheesecake-with-salted-caramel-glaze')
nyt = NYTimesCooking(html.content)

In [10]:
with open('recipe.txt', 'w') as f:
    f.write(nyt.__repr__())