## Food2Fork API ##
www.food2fork.com/about/api

In [57]:
# API access
import requests
import urllib2
from urllib import urlencode
import json
from bs4 import BeautifulSoup

# Plotting
from matplotlib import pyplot as plt

# Parsing ingredient lists
import nltk
import itertools
from itertools import compress

In [58]:
%matplotlib inline

### API credentials ###

In [59]:
def loadCredentials():
    filename = 'secrets.txt'
    for line in open(filename).readlines():
        if "API" in line:    
            api_key = line.split(": ")[1].translate(None,'\n')
        
    return api_key

In [60]:
# Load the API Key
_API_KEY = loadCredentials()

# Food2Fork links
_URL_API = "http://food2fork.com/api/"
_URL_SEARCH = _URL_API + 'search?'
_URL_GET    = _URL_API + 'get?'
_HEADER = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64)'}

### Search the Food2Fork API ###

In [61]:
def search(query, page=1, count=1):   
    """Return a list of recipes from the Food2Fork.com database"""
    
    assert(0 < count <= 30), 'max 30 results per call, min 1' #https://github.com/davebshow/food2forkclient/
        
    # Format the request URL
    query_params = {"key":_API_KEY,"q":query, "page":page, "count":count}            
    api_request = _URL_SEARCH + urlencode(query_params)    

    # Make the request
    request  = urllib2.Request(api_request, headers=_HEADER)
    response = urllib2.urlopen(request)
    raw = response.read().decode('utf-8')
    json_obj = json.loads(raw)  
    
    return json_obj['recipes']

In [None]:
# Search for cookie recipes
recipes = search('Chocolate chip cookies',count=5)
print(recipes[0]['image_url'])
recipes[0]

### Request a specific recipe from the API ###

In [71]:
def getRecipe(recipe_id):
    """Return the recipe specified by *recipe_id* from Food2Fork's database"""

    # Format the request URL
    api_request = _URL_GET + urlencode({"key":_API_KEY, "rId":recipe_id})
    
    # Make the request
    request  = urllib2.Request(api_request, headers=_HEADER)
    response = urllib2.urlopen(request)
    raw = response.read()
    json_obj = json.loads(raw)  
    
    return json_obj['recipe']

In [72]:
r = getRecipe('35130')
r

{u'f2f_url': u'http://food2fork.com/view/35130',
 u'image_url': u'http://static.food2fork.com/BananasFosterPancakes15000eeed5d6.jpg',
 u'ingredients': [u'1 cup all purpose flour',
  u'1 teaspoon baking powder',
  u'1/4 teaspoon salt',
  u'1/4 teaspoon cinnamon',
  u'1 cup milk',
  u'1 egg',
  u'2 tablespoons brown sugar',
  u'2 tablespoons unsalted butter (melted)',
  u'2 overripe bananas (peeled and mashed)',
  u'2 tablespoons butter',
  u'2 tablespoons brown sugar',
  u'2 dashes of cinnamon',
  u'2 ripe bananas (peeled and sliced)',
  u'2 ounces dark rum\n'],
 u'publisher': u'Closet Cooking',
 u'publisher_url': u'http://closetcooking.com',
 u'recipe_id': u'35130',
 u'social_rank': 38.259222092243164,
 u'source_url': u'http://www.closetcooking.com/2010/05/bananas-foster-pancakes.html',
 u'title': u'Bananas Foster Pancakes'}

### Scrape ingredients from a Food2Fork.com page ###

In [9]:
def getIngredients(json_obj):
    r = getRecipe(json_obj['recipe_id'])
    return [i.strip('\n') for i in r['ingredients']]

In [10]:
ingredients = getIngredients(recipes[0])
ingredients

[u'4 small chicken breasts, pounded thin',
 u'salt and pepper to taste',
 u'4 jalapenos, diced',
 u'4 ounces cream cheese, room temperature',
 u'1 cup cheddar cheese, shredded',
 u'8 slices bacon']

In [25]:
recipe = search('Chocolate cake')

In [27]:
recipe[0]

{u'f2f_url': u'http://food2fork.com/view/9089e3',
 u'image_url': u'http://static.food2fork.com/604133_mediumd392.jpg',
 u'publisher': u'BBC Good Food',
 u'publisher_url': u'http://www.bbcgoodfood.com',
 u'recipe_id': u'9089e3',
 u'social_rank': 100.0,
 u'source_url': u'http://www.bbcgoodfood.com/recipes/873655/cookie-monster-cupcakes',
 u'title': u'Cookie Monster cupcakes'}

## Use NLTK to parse amounts out from ingredients ##

In [11]:
from nltk import word_tokenize

In [12]:
[ingredient for ingredient in ingredients]

[u'4 small chicken breasts, pounded thin',
 u'salt and pepper to taste',
 u'4 jalapenos, diced',
 u'4 ounces cream cheese, room temperature',
 u'1 cup cheddar cheese, shredded',
 u'8 slices bacon']

In [13]:
def getNounMask(ingredients):
    POS = [nltk.pos_tag(word_tokenize(ingredient)) for ingredient in ingredients]
    mask_nouns = [['NN' in word[1] for word in sent] for sent in POS]
    return mask_nouns

In [14]:
def parseIngredients(ingredients):
    mask_nouns = getNounMask(ingredients)
    for ingreeds, mask in zip(ingredients, mask_nouns):
        print(' '.join(list(compress(word_tokenize(ingreeds),mask))))

In [15]:
recipes = search('cookies')

In [16]:
n=3
print(recipes[n]['title'])
ingredients = getIngredients(recipes[n])
print(ingredients)
parseIngredients(ingredients)

Slutty Brownies
[u'10 tbsp unsalted butter', u'1 1/4 cups white sugar', u'3/4 cup cocoa powder', u'1/2 tsp salt', u'2 tsp vanilla extract', u'2 large eggs', u'1/2 cup AP flour', u'For the Oreo layer:1 package of Oreo (regular stuffed or double stuffed)', u'For the Cookie Dough layer:1/2 cup unsalted butter (at room temp)', u'1/4 cup brown sugar', u'3/4 cup white sugar', u'1 egg', u'1 1/4 tsp vanilla extract', u'1 1/4 cups AP flour', u'1/2 tsp salt', u'1/2 tsp baking soda', u'1/2 tsp baking powder', u'1 cup semi-sweet chocolate chips', u'InstructionsFor the Brownie layer:']
tbsp butter
cups sugar
cup cocoa powder
tsp salt
vanilla extract
eggs
cup AP flour
Oreo package Oreo stuffed stuffed
Cookie Dough cup butter room temp
cup sugar
sugar
egg
tsp vanilla extract
cups AP flour
tsp salt
tsp baking soda
tsp baking powder
chocolate chips
InstructionsFor Brownie layer


This is tough! For example, I wanted to just keep nouns, but then you end up with two "sugars" because you throw out "brown sugar"!