In [2]:
import spacy
from __future__ import print_function, unicode_literals
import re

In [10]:
nlp = spacy.load('en')

In [57]:
texts = [
      "1 cup (2 sticks) butter, softened",
      "3/4 cup granulated sugar",
      "3/4 cup firmly packed brown sugar",
      "2 eggs",
      "1 tsp vanilla",
      "2 1/4 cups flour",
      "1 tsp baking soda",
      "1/4  tsp salt",
      "1  pkg (12 oz) Real Chocolate Chips",
      "1 cup chopped nuts (optional)",
      "1 lb Fettuccini",
      "1 lb Shrimp, peeled and cleaned",
      "4  Zucchini",
      "1 lb Sliced mushrooms",
      "8  Whole, fresh tomatoes",
      "2 cloves Minced garlic",
      "2 tsp Oregano",
      "1 tsp Salt",
      "1 tsp Pepper",
      "1 tsp Virgin Olive Oil",
      "1-1/2 lbs ground beef",
      "1/2  cup A.1. Thick Hearty Steak Sauce",
      "2 Tbs chopped oil-packed sun-dried tomatoes",
      "3 Tbs chopped fresh basil",
      "1 Tbs toasted pine nuts",
      "6  Sourdough rolls, split",
      "3 oz goat cheese"
    ]

In [58]:
MEASURE_WORDS = [
    'teaspoon',
    'teaspoons',
    'tsp',
    'tsps',
    'tbs',
    'tablespoon',
    'tablespoons',
    'tbsp',
    'tbsps',
    'tbl',
    'package',
    'packet',
    'packages',
    'packets',
    'pkg',
    'pkt',
    'ounces'
    'ounce',
    'oz',
    'fl oz'
    'cup',
    'cups',
    'pint',
    'pints',
    'quart',
    'quarts',
    'gallon',
    'gallons',
    'gal',
    'liter',
    'liters',
    'litre',
    'litres',
    'pound',
    'pounds',
    'lb',
    'lbs',
    'milligram',
    'milligrams',
    'mg',
    'g',
    'gram',
    'grams',
    'kilogram',
    'kilograms',
    'kg'
]

CATEGORIES = [
    'cereals/grains',
    'dairy',
    'eggs',
    'fruits',
    'vegetables',
    'sweeteners',
    'meat/poultry',
    'fats/oils',
    'nuts/seeds',
    'spices/herbs',
    'seafood'
]

ALLERGENS = [
    'dairy',
    'eggs',
    'fish',
    'shellfish',
    'tree nuts',
    'peanuts',
    'wheat',
    'soybeans'
]

INGREDIENT_DICT = {
}

In [65]:
def clean_text(text):
    # removing any text in parenthesis
    text = re.sub(r'\(.+\)', '', text)
    # convert to lower case
    text = text.lower()
    text = text.split(' ')
    while '' in text:
        text.remove('')
    while " " in text:
        text.remove(" ")
    while "\n" in text:
        text.remove("\n")
    while "\n\n" in text:
        text.remove("\n\n")
    return u' '.join(text)

def find_and_store_digit(text):
    '''
    :text - text to take digit
    :return: - returns digit
    '''
    digits = re.findall(r'\d*.*\d', text)
    
    if len(digits) > 0:
        return digits[0]
    else:
        return None

def remove_digits(text):
    return re.sub(r'\d*.*\d', '', text)

def parse_recipe(text):
    clean = clean_text(text)
    digit = find_and_store_digit(clean)
    clean = remove_digits(clean)
    
    doc = nlp(clean)
    desc = []
    core = []
    unit = None
    ## digging out the core
    for word in doc:
        if word.pos_ == 'ADJ' or word.pos_ == 'VERB':
            desc.append(word.text)
        elif word.text in MEASURE_WORDS:
            unit = word.text
        elif word.pos_ in ['ADV', 'SPACE', 'PUNCT', 'PART', 'CCONJ']:
            continue
        else:
            core.append(word.text)
    basic_text = u' '.join(core)
    new_doc = nlp(basic_text)
    
    possible_ingredients = list(new_doc.noun_chunks)
    if len(possible_ingredients) == 0:
        ingredient = None
    else:
        ingredient = possible_ingredients[0]
    
    out = dict(ingredient=ingredient, unit=unit, quantity=digit)
    return out

In [66]:
parse_recipe('1 lb sun-dried tomatoes')

[]


{'ingredient': None, 'quantity': u'1', 'unit': u'lb'}

In [50]:
while True:
    recipe = raw_input("Type in an ingredient ")
    parsed_recipe = parse_recipe(recipe)
    print('The original recipe is: %s' % recipe)
    print('This is what we found..')
    print('Quantity: %s' % parsed_recipe['quantity'])
    print('Unit: %s' % parsed_recipe['unit'])
    print('Ingredient %s' % parsed_recipe['ingredient'])
    print('-----')

Type in an ingredient hi
The original recipe is: hi
This is what we found..
Quantity: None
Unit: None
Ingredient None
-----
Type in an ingredient 1 pound garbage
The original recipe is: 1 pound garbage
This is what we found..
Quantity: 1
Unit: pound
Ingredient garbage
-----
Type in an ingredient 1 lb garbage
The original recipe is: 1 lb garbage
This is what we found..
Quantity: 1
Unit: lb
Ingredient garbage
-----
Type in an ingredient 2 lbs garbage
The original recipe is: 2 lbs garbage
This is what we found..
Quantity: 2
Unit: lbs
Ingredient garbage
-----
Type in an ingredient 1 pound of sun-dried tomatoes
The original recipe is: 1 pound of sun-dried tomatoes
This is what we found..
Quantity: 1
Unit: pound
Ingredient sun tomatoes
-----


KeyboardInterrupt: 

In [44]:
for recipe in texts:
    parsed_recipe = parse_recipe(recipe)
    print('The original recipe is: %s' % recipe)
    print('This is what we found..')
    print('Quantity: %s' % parsed_recipe['quantity'])
    print('Unit: %s' % parsed_recipe['unit'])
    print('Ingredient %s' % parsed_recipe['ingredient'])
    print('-----')

The original recipe is: 1 cup (2 sticks) butter, softened
This is what we found..
Quantity: 1
Unit: None
Ingredient cup butter
-----
The original recipe is: 3/4 cup granulated sugar
This is what we found..
Quantity: 3/4
Unit: None
Ingredient cup sugar
-----
The original recipe is: 3/4 cup firmly packed brown sugar
This is what we found..
Quantity: 3/4
Unit: None
Ingredient cup sugar
-----
The original recipe is: 2 eggs
This is what we found..
Quantity: 2
Unit: None
Ingredient eggs
-----
The original recipe is: 1 tsp vanilla
This is what we found..
Quantity: 1
Unit: tsp
Ingredient vanilla
-----
The original recipe is: 2 1/4 cups flour
This is what we found..
Quantity: 2 1/4
Unit: cups
Ingredient flour
-----
The original recipe is: 1 tsp baking soda
This is what we found..
Quantity: 1
Unit: tsp
Ingredient baking soda
-----
The original recipe is: 1/4  tsp salt
This is what we found..
Quantity: 1/4
Unit: tsp
Ingredient salt
-----
The original recipe is: 1  pkg (12 oz) Real Chocolate Chips