## Food2Fork API ##
www.food2fork.com/about/api

In [1]:
# API access
import requests
import urllib2
from urllib import urlencode
import json
from bs4 import BeautifulSoup

# Plotting
from matplotlib import pyplot as plt

# Parsing ingredient lists
import re
import nltk
import itertools
from itertools import compress

In [2]:
%matplotlib inline

### API credentials ###

In [3]:
def loadCredentials():
    filename = 'secrets.txt'
    for line in open(filename).readlines():
        if "API" in line:    
            api_key = line.split(": ")[1].translate(None,'\n')
        
    return api_key

In [4]:
# Load the API Key
_API_KEY = loadCredentials()

# Food2Fork links
_URL_API = "http://food2fork.com/api/"
_URL_SEARCH = _URL_API + 'search?'
_URL_GET    = _URL_API + 'get?'
_HEADER = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64)'}

### Search the Food2Fork API ###

In [5]:
def search(query, page=1, count=1):   
    """Return a list of recipes from the Food2Fork.com database"""
    
    assert(0 < count <= 30), 'max 30 results per call, min 1' #https://github.com/davebshow/food2forkclient/
        
    # Format the request URL
    query_params = {"key":_API_KEY,"q":query, "page":page, "count":count}            
    api_request = _URL_SEARCH + urlencode(query_params)    

    # Make the request
    request  = urllib2.Request(api_request, headers=_HEADER)
    response = urllib2.urlopen(request)
    raw = response.read()
    json_obj = json.loads(raw)['recipes']
    
    if len(json_obj)==1:    
        return json_obj[0]
    else:
        return json_obj

In [6]:
# Search for cookie recipes
recipes = search('Chocolate chip cookies',count=5)
print(recipes[0]['image_url'])
recipes[0]

http://static.food2fork.com/508714d505.jpg


{u'f2f_url': u'http://food2fork.com/view/6868',
 u'image_url': u'http://static.food2fork.com/508714d505.jpg',
 u'publisher': u'All Recipes',
 u'publisher_url': u'http://allrecipes.com',
 u'recipe_id': u'6868',
 u'social_rank': 99.99999999999996,
 u'source_url': u'http://allrecipes.com/Recipe/Chewy-Chocolate-Chip-Oatmeal-Cookies/Detail.aspx',
 u'title': u'Chewy Chocolate Chip Oatmeal Cookies'}

### Request a specific recipe from the API ###

In [7]:
def getRecipe(recipe_id):
    """Return the recipe specified by *recipe_id* from Food2Fork's database"""

    # Format the request URL
    api_request = _URL_GET + urlencode({"key":_API_KEY, "rId":recipe_id})
    
    # Make the request
    request  = urllib2.Request(api_request, headers=_HEADER)
    response = urllib2.urlopen(request)
    raw = response.read()
    json_obj = json.loads(raw)  
    
    return json_obj['recipe']

### Scrape ingredients from a Food2Fork.com page ###

In [8]:
def getIngredients(json_obj):
    r = getRecipe(json_obj['recipe_id'])
    return [i.strip('\n').encode('ascii', 'ignore').lower() for i in r['ingredients']]

In [9]:
# ingredients = getIngredients(getRecipe(search("Chocolate chip cookies",count=5)[0]['recipe_id']))
ingredients = getIngredients(getRecipe(search("Soup",count=5)[1]['recipe_id']))
ingredients

['2 tablespoons butter',
 '2 whole large onions, halved and sliced thin',
 '1/4 cup beef broth',
 '7 dashes worcestershire sauce',
 'splash of red or white wine',
 '1/2 cup grated gruyere cheese (can use swiss)',
 'kosher salt',
 '24 whole white or crimini mushrooms, washed and stems removed',
 'minced parsley']

## Parse ingredient amounts using reg exp and NLTK ##

In [10]:
from ingredient_parser.en import parse

In [11]:
ingredients


['2 tablespoons butter',
 '2 whole large onions, halved and sliced thin',
 '1/4 cup beef broth',
 '7 dashes worcestershire sauce',
 'splash of red or white wine',
 '1/2 cup grated gruyere cheese (can use swiss)',
 'kosher salt',
 '24 whole white or crimini mushrooms, washed and stems removed',
 'minced parsley']

In [12]:
p = [parse(ingrd) for ingrd in ingredients]
p

[{'measure': '2 tablespoon', 'name': 'butter'},
 {'measure': '2 large', 'name': 'onions, halved and sliced thin'},
 {'measure': '1/4 cup', 'name': 'beef broth'},
 {'measure': '7 dash', 'name': 'worcestershire sauce'},
 {'measure': ' splash', 'name': 'red or white wine'},
 {'measure': '1/2 cup', 'name': 'grated gruyere cheese (can use swiss)'},
 {'measure': ' ', 'name': 'kosher salt'},
 {'measure': '24 whole',
  'name': 'white or crimini mushrooms, washed and stems removed'},
 {'measure': ' ', 'name': 'minced parsley'}]

#### Attempt to parse with regular expressions ####

In [14]:
# https://pypi.python.org/pypi/ingredient-parser/1.0.1
from fractions import Fraction
# from nltk import PorterStemmer

# def parseIngredients(ingredients):
p = [parse(ingrd) for ingrd in ingredients]
num_ingredients = len(p)

# num_words = ['zero','one','two','three','four','five','six','seven','eight','nine',
#            'ten','eleven','twelve','thirteen','fourteen','fifteen','sixteen',
#            'seventeen','eighteen','nineteen','twenty']
# NUMBERS = dict(zip(num_words,range(0,len(num_words))))

# Use RegEx to get ingredient amount from parsed list
expr = '((\d{0,2}|(\d*(/|.)\d*))? (\d/\d)?)?'
matches = [re.search(expr,ingrd['measure']) for ingrd in p]
amounts = [match.group().strip() for match in matches]

# Convert amounts to float
amounts = [float(sum(Fraction(s) for s in a.split())) for a in amounts]

# Get measurement unit from the RegEx matches
# PS = PorterStemmer()
# units = [str(PS.stem(i['measure'][m.end():].strip(' .'))) for i,m in zip(p,matches)]
units = [i['measure'][m.end():].strip() for i,m in zip(p,matches)]

# Get name from parsed list
names = [ingrd['name'].split(',')[0] for ingrd in p]

# What have we parsed so far?
[(a,u,n) for a,u,n in zip(amounts,units,names)]


[(2.0, 'tablespoon', 'butter'),
 (2.0, 'large', 'onions'),
 (0.25, 'cup', 'beef broth'),
 (7.0, 'dash', 'worcestershire sauce'),
 (0.0, 'splash', 'red or white wine'),
 (0.5, 'cup', 'grated gruyere cheese (can use swiss)'),
 (0.0, '', 'kosher salt'),
 (24.0, 'whole', 'white or crimini mushrooms'),
 (0.0, '', 'minced parsley')]

In [17]:
class Ingredient(object):
    
    def __init__(self, name, amount, units, description):
        self._name = name # e.g. butter, sugar, etc. (this needs a better variable name than "name")        
        self._amount = amount # How many of units?
        self._units = units   # Measurement units (e.g. cup, tablespoon, pound, etc.)
        self._description = description # e.g. softened, blackened, etc.
                
    @property
    def name(self): # e.g. butter, chocolate chips, ground beef
        return self._name

    @property
    def units(self): # e.g. cups, teaspoons, oz
        return self._units
    
    @property
    def amount(self): # e.g. 1, 2, 1 1/2, 3/4
        return self._amount    
    
    @property
    def description(self): # e.g. softened, lightly-packed
        return self._description
    
    def __repr__(self):        
        return repr((self.amount, self.units, self.name))