In [1]:
import numpy as np
import pandas as pd
import difflib
import re
from fractions import Fraction
import json

prices = pd.read_csv('prices.csv', index_col = 'item')
recipes = None
json_name = 'recipes_with_nutritional_info'
with open(json_name + '.json') as recp:
    recipes = json.load(recp)



In [2]:
def singularize(unit):
    if len(unit) and unit[-1] == 's':
        return unit[:-1]
    else:
        return unit

def no_specials(unit):
    return re.sub('[^A-Za-z]+', '', unit)

def frac_to_float(frac_str):
    try:
        return float(sum(Fraction(term) for term in frac_str.split()))
    except ValueError:
        return 0

unit_acronym_map = { 
    'milliliter': 'ml', 
    'millilitre': 'ml',
    'deciliter': 'dl',
    'decilitre': 'dl', 
    'liter': 'l', 
    'litre': 'l', 
    'tsp': 'teaspoon',
    'tbsp': 'tablespoon', 
    'fluidounce': 'floz', 
    'oz': 'ounce', 
    'kilogram': 'kg', 
    'gram': 'g', 
    'milligram': 'mg', 
    'lb': 'pound'
}

def simplify_unit(unit):
    """ 
    makes [unit] lowercase, removes special characters, singularizes, then tries to map it to a simpler version used
    """ 
    unit = no_specials(singularize(unit)).lower()
    if unit in unit_acronym_map:
        return unit_acronym_map[unit]
    else:
        return unit

# units of volume
volume_units = ['teaspoon', 'tablespoon', 'floz', 'ml', 'dl', 'l', 'cup', 'quart', 'pint', 'gallon']

# 1 [unit] is [volume_conv_to_cups[unit]] cups
volume_conv_to_cup = {
    'cup': 1, 
    'teaspoon': 1/48, 
    'tablespoon': 1/16, 
    'floz': 1/8, 
    'ml': 0.00422675, 
    'dl': 0.422675, 
    'l': 4.22675, 
    'quart': 4, 
    'pint': 2, 
    'gallon': 16
}

weight_units = ['pound', 'mg', 'g', 'kg', 'ounce']

# 1 [unit] is [weight_conv_to_pound[unit]] pounds
weight_conv_to_pound = {
    'pound': 1, 
    'mg': 2.20462e-6, 
    'g': 0.00220462, 
    'kg': 2.20462, 
    'ounce': 0.0625
}

def convert(str_amt, unit):
    """
    Convert a fraction string [str_amt] and the [unit] to it's respective unit for indexing in the table
    """
    unit = simplify_unit(unit)
    amt = frac_to_float(str_amt)
    if unit in volume_conv_to_cup:
        return (volume_conv_to_cup[unit] * amt, 'cup')
    elif unit in weight_conv_to_pound:
        return (weight_conv_to_pound[unit] * amt, 'pound')
    else:
        return (amt, unit)
    


In [3]:
DEFAULT_PRICE_PER_POUND = 5
DEFAULT_PRICE_PER_CUP = 1
DEFAULT_PRICE_PER_NOUNIT = 1

def truncate_item(item):
    """ 
    truncate a food item to the first thing before the comma
    """
    return item.lower().split(',')[0].strip()

def get_default_price(unit):
    if unit == 'cup':
        return DEFAULT_PRICE_PER_CUP
    elif unit == 'pound':
        return DEFAULT_PRICE_PER_POUND
    else:
        return DEFAULT_PRICE_PER_NOUNIT


def per_unit_price(item, unit, prices):    
    """ 
    Pre: item is in the prices csv. gets the per unit pricee 
    """
    row = prices.loc[item]
    if unit == row['unit']:
        return row['price']
    else:
        return get_default_price(unit)

def iqu_to_price(iqu, prices):
    """ 
    tuple of item, quantity, unit used to get a price in the prices
    """
    i, q, u = iqu
    item_matches = difflib.get_close_matches(i, prices.index, 3, 0.6)
    if len(item_matches) == 0:
        return get_default_price(u)

    return np.average(list(map(lambda match_item: per_unit_price(match_item, u, prices) * q, item_matches)))



def recipe_to_price(recipe, prices):
    ingr = list(map(lambda d: d['text'], recipe['ingredients']))
    qs = list(map(lambda d: d['text'], recipe['quantity']))
    units = list(map(lambda d: d['text'], recipe['unit']))

    converted_qs = [convert(q, u) for q, u in zip(qs, units)]

    ingr_q_unit_tuples = [(truncate_item(i), q, u) for i, (q, u) in zip(ingr, converted_qs)]

    # prices for all ingredients
    prices = [iqu_to_price(iqu, prices) for iqu in ingr_q_unit_tuples]

    return sum(prices)

for rp in recipes:
    rp['cost'] = recipe_to_price(rp, prices)


KeyboardInterrupt: 

In [None]:
output_file_str = json_name + '_prices.json'
with open(output_file_str, 'w') as output:
    json.dump(recipes, output)
