In [2]:
import pandas as pd
import numpy as np

### # Upload the dataset (should be on server)

In [483]:
data=pd.read_csv('data/data.csv')
data.drop('Unnamed: 0', axis=1, inplace=True)

In [635]:
ingredients=pd.read_csv('data/ingredients_short.csv')
ingredients.drop('Unnamed: 0', axis=1, inplace=True)

In [636]:
ingredients.head()

Unnamed: 0,stem,substitute 1,substitute 2,substitute 3,food type
0,cream,half-and-half,,,
1,flaxseed,,,,
2,farro,,,,
3,applesauce,,,,
4,cranberry,,,,fruit


In [484]:
# Upload the list of non_key ingredients
spices=list(pd.read_csv('data/spices.csv')['spices'])
spices=[x for x in spices if x in data.columns]
garnish=['parsley', 'dried parsley', 'cilantro', 'cilantro leaves', 'dill', 
         'celery leaves', 'chives', 'chocolate chips', 'sesame', 'black sesame seeds', 'sesame seeds']

In [491]:
# separate ingredients from non-ingredients 
non_ingredients=['meal','title','calories','protein','carbs','fats','sodium','cuisine', 'complexity']
non_ingredients.extend(spices)

In [641]:
group_keys=['pasta', 'mold cheese', 'soft cheese', 'brined cheese', 'medium cheese', 'hard cheese', 'cottage cheese', 'dry wine', 
            'liquer', 'white wine', 'red wine']
group=dict(zip(group_keys,
            [list(ingredients[ingredients['substitute 1']==key]['stem']) for key in group_keys]))

In [642]:
group_values=[y for x in group.values() for y in x]

### Inputs
 
grocery: list <br>

calories_max: int <br>
calories_min: int <br>
protein_min: int <br>
meal_type: breakfast, lunch, dinner, dessert, drink <br>
cuisine: 20 cuisines in the list <br>
complexity: easy, medium, hard <br>
n_additional_ingredients: int <br>


In [643]:
# Should be at home
home=['pepper', 'butter', 'olive oil', 'sugar', 'salt', 'water', 'lemon juice', 'dijon mustard', 'black pepper', 'ketchup']
grocery=['apple', 'milk', 'hazelnut', 'cucumber', 'bread', 'tomato', 
         'cod',  'rice', 'garlic', 'chicken', 'egg', 'feta', 'cream cheese', 'pasta']
grocery.extend(home)

In [644]:
# extend grocery for all available substitutes
ingredients_nongroup=[gr for gr in grocery if gr not in group_values]
ingredients_group=list(set([ingredients[ingredients.stem==gr]['substitute 1'].values[0] for gr in grocery if gr in group_values]))


In [648]:
for x in ingredients_nongroup:
    #extend to own substitutes
    idx=ingredients[ingredients['stem']==x].index
    a=[ing for ing in ingredients.loc[idx][['substitute 1', 'substitute 2', 'substitute 3']].values if type(ing)==str]
    if len(a)>0:
        grocery.extend(a)
    #check whether the ingredient in other products substitutes
    idx=ingredients[(ingredients['substitute 1']==x)|(ingredients['substitute 2']==x)|(ingredients['substitute 3']==x)].index
    if len(idx)>0:
        grocery.extend(list(ingredients.loc[idx]['stem']))
        
for x in ingredients_group:
    grocery.extend(group[x])
    

### Function

In [650]:
# quick calculation of additional products
def products_to_add(options, i):
    s=options.loc[i][needed]
    return ', '.join(list(s[s==1].index))

def return_recipes(calories_max=800, 
                   calories_min=500,
                   protein_min=25,
                   meal_type='lunch',
                   cuisine='non_specified',
                   complexity='easy',
                   n_additional_ingredients=4,
                   grocery=grocery):
    if meal_type in ['lunch', 'dinner']: meal_type='lunch/dinner'
    
    #filter basic parameters 
    options=data[(data.meal==meal_type)&
                       (data.calories<calories_max)&
                       (data.calories>calories_min)&
                       (data.protein>protein_min)&
                       (data.complexity=='easy')
                      ]
    if cuisine!='non_specified':
        options=options[options.cuisine==cuisine]
        
    #filter based on grocery
    
    # drop columns with unused ingredients
    ingredients=options.drop(non_ingredients, axis=1).columns
    options.drop([x for x in ingredients if sum(options[x])==0], axis=1, inplace=True)
    
    #update ingredients
    ingredients=options.drop(non_ingredients, axis=1).columns
    
    ## products outside the groccery list   
        
    needed=[x for x in ingredients if x not in grocery]

    # Keep only the recipes if the number of additional key ingredients doesnt exceed 3
    sums=options[needed].sum(axis=1)
    ind=[x for x in sums.index if sums.loc[x]<=n_additional_ingredients]
    options=options.loc[ind]
    
    recommendation=options.loc[ind][['title', 'calories', 'protein', 'carbs', 'fats']]

    recommendation['products to add']=pd.Series([products_to_add(options, i) for i in ind], index=ind)

    recommendation['# of products to add']=pd.Series([len([y for y in recommendation.loc[i]['products to add'].split(",")  
                                                       if y not in non_ingredients]) for i in ind], index=ind)
    
    return recommendation

    

    

In [533]:
options=data[(data.meal==meal_type)&
                       (data.calories<calories_max)&
                       (data.calories>calories_min)&
                       (data.protein>protein_min)&
                       (data.complexity=='easy')
                      ]

In [534]:
cuisine='italian'

if cuisine!='non_specified':
        options=options[options.cuisine==cuisine]

In [500]:
ingredients=options.drop(non_ingredients, axis=1).columns
options.drop([x for x in ingredients if sum(options[x])==0], axis=1, inplace=True)

In [501]:
ingredients=options.drop(non_ingredients, axis=1).columns
    
    ## products outside the groccery list   
        
needed=[x for x in ingredients if x not in grocery]

In [502]:
needed

['black tea',
 'juniper berries',
 'mushroom',
 'cutlet',
 'butternut squash',
 'button mushrooms',
 'kosher salt',
 'beef chuck roast',
 'red cabbage',
 'crumbled blue cheese',
 'ground beef',
 'parmesan',
 'canola oil',
 'serrano chile',
 'italian sausage',
 'lasagna noodles',
 'walnuts',
 'spaghetti',
 'spinach',
 'king crab',
 'fillet',
 'juice',
 'eel',
 'baby artichokes',
 'kale',
 'fennel seeds',
 'roast',
 'mayonnaise',
 'ice',
 'lamb stew meat',
 'loaf',
 'green cabbage',
 'sea salt',
 'peeled tomatoes',
 'fat',
 'potato',
 'onion',
 'sausage',
 'eggplant',
 'penne',
 'kalamata olives',
 'honey',
 'cherry tomatoes',
 'swiss chard',
 'game hens',
 'baby arugula',
 'apple cider vinegar',
 'ground red pepper',
 'honeycomb tripe',
 'cabbage',
 'cream',
 'avocado',
 'wine vinegar',
 'panko',
 'anchovy',
 'leek',
 'shallot',
 'turkey',
 'flour',
 'hop',
 'pecorino cheese',
 'cream cheese',
 'salmon',
 'lemon',
 'arugula',
 'marinara sauce',
 'pork shoulder',
 'squid',
 'lemon peel',

In [503]:
sums=options[needed].sum(axis=1)

In [506]:
ind=[x for x in sums.index if sums.loc[x]<=n_additional_ingredients]
options=options.loc[ind]

In [507]:
options

Unnamed: 0,title,black tea,juniper berries,mushroom,cutlet,anise,sumac,butternut squash,rosemary,button mushrooms,...,tomato sauce,russet potatoes,calories,protein,fats,carbs,sodium,meal,complexity,cuisine
188,Pork Roast Braised with Milk and Fresh Herbs (...,0,1,0,0,0,0,0,1,0,...,0,0,689.0,85.0,34.0,87.25,723.0,lunch/dinner,easy,italian
331,Spiced Fillet of Beef with Mizuna Salad,0,0,0,0,0,0,0,0,0,...,0,0,727.0,53.0,55.0,128.75,1509.0,lunch/dinner,easy,italian
1038,"Pasta with Lobster, Tomatoes and ""Herbes de Ma...",0,0,0,0,0,0,0,1,0,...,0,0,538.0,43.0,12.0,91.5,960.0,lunch/dinner,easy,italian
1165,Roasted Beef Tenderloin Wrapped in Bacon,0,0,0,0,0,0,0,1,0,...,0,0,659.0,41.0,53.0,123.75,461.0,lunch/dinner,easy,italian
1255,Flattened Cornish Game Hens with Garlic-Citrus...,1,0,0,0,0,0,0,0,0,...,0,0,761.0,54.0,57.0,136.25,482.0,lunch/dinner,easy,italian
1835,Butternut Squash and White Bean Soup,0,0,0,0,0,0,1,0,0,...,0,0,618.0,37.0,14.0,117.5,106.0,lunch/dinner,easy,italian
2629,"Chicken with Prosciutto, Rosemary, and White W...",0,0,0,0,0,0,0,1,0,...,0,0,502.0,44.0,30.0,81.5,891.0,lunch/dinner,easy,italian
3875,Steamed Clams with Pasta,0,0,0,0,0,0,0,0,0,...,0,0,715.0,77.0,10.0,101.75,2734.0,lunch/dinner,easy,italian
5081,Fennel and Sausage Ragù Over Pasta,0,0,0,0,0,0,0,0,0,...,0,0,730.0,36.0,17.0,146.5,1239.0,lunch/dinner,easy,italian
5094,Red-Wine Pot Roast with Porcini,0,0,0,0,0,0,0,0,0,...,0,0,796.0,60.0,56.0,139.0,371.0,lunch/dinner,easy,italian


In [508]:
i=1835

In [509]:
[len([y for y in recommendation.loc[i]['products to add'].split(",")  
                                                       if y not in non_ingredients]

SyntaxError: unexpected EOF while parsing (<ipython-input-509-2b2faa7bcbf5>, line 2)

### Example

In [651]:
return_recipes(calories_max=800, 
                   calories_min=500,
                   protein_min=25,
                   meal_type='lunch',
                   cuisine='italian',
                   complexity='easy',
                   n_additional_ingredients=4,
                   grocery=grocery)


Unnamed: 0,title,calories,protein,carbs,fats,products to add,# of products to add
188,Pork Roast Braised with Milk and Fresh Herbs (...,689.0,85.0,87.25,34.0,"juniper berries, sea salt, pork shoulder, dry ...",5
331,Spiced Fillet of Beef with Mizuna Salad,727.0,53.0,128.75,55.0,"baby arugula, ground red pepper, shallot, beef...",5
1038,"Pasta with Lobster, Tomatoes and ""Herbes de Ma...",538.0,43.0,91.5,12.0,"onion, wine vinegar, lobster, whipping cream, ...",5
1165,Roasted Beef Tenderloin Wrapped in Bacon,659.0,41.0,123.75,53.0,"beef, bacon",2
1255,Flattened Cornish Game Hens with Garlic-Citrus...,761.0,54.0,136.25,57.0,"black tea, honey, game hens, lemon, lemon peel",5
1835,Butternut Squash and White Bean Soup,618.0,37.0,117.5,14.0,"butternut squash, roast, chicken stock, white ...",4
2629,"Chicken with Prosciutto, Rosemary, and White W...",502.0,44.0,81.5,30.0,"chicken drumsticks, dry wine, prosciutto",3
3875,Steamed Clams with Pasta,715.0,77.0,101.75,10.0,"ground red pepper, linguini, clams",3
5081,Fennel and Sausage Ragù Over Pasta,730.0,36.0,146.5,17.0,"onion, marinara sauce, dry wine, pasta, sweet ...",5
6077,Fettuccine With Sausage and Kale,602.0,42.0,108.5,28.0,"kale, sausage, pecorino cheese, chicken stock,...",5


In [496]:
data.loc[1835][data.loc[1835]==1].index

Index(['butternut squash', 'roast', 'olive oil', 'tomato', 'sage',
       'chicken stock', 'water', 'garlic', 'egg', 'white beans'],
      dtype='object')

In [510]:
i=1835

In [511]:
[products_to_add(options, i) for i in ind]

['juniper berries, sea salt, pork shoulder, dry wine, california bay leaves',
 'baby arugula, ground red pepper, shallot, beef, vegetable oil',
 'onion, wine vinegar, lobster, whipping cream, fettuccine',
 'beef, bacon',
 'black tea, honey, game hens, lemon, lemon peel',
 'butternut squash, roast, chicken stock, white beans',
 'chicken drumsticks, dry wine, prosciutto',
 'ground red pepper, linguini, clams',
 'onion, marinara sauce, dry wine, pasta, sweet sausage',
 'beef chuck roast, peeled tomatoes, onion, dry wine, dried porcini',
 'kale, sausage, pecorino cheese, chicken stock, fettuccine',
 'lamb stew meat, dry wine, peas',
 'crumbled blue cheese, wine vinegar, shallot, cream cheese, beef steak',
 'beef',
 'kosher salt, bean',
 'salmon, vegetable oil, fettuccine, capers',
 'parmesan, onion, bacon, pasta',
 'flour, pecorino cheese, wonton wrappers, russet potatoes',
 'kosher salt, baby arugula, panko',
 'lemon peel, beef, cloves',
 'parmesan, beef, tomato paste, roll, dried oregano

### Find pictures for the most common types of foods

In [92]:
titles=list(data_short['title'])

In [93]:
titles=[x.replace(',','') for x in titles]

In [94]:
titles[350]

'Steamed Mussels with Sherry Tomatoes and Garlic '

In [95]:
titles=[x.lower().split() for x in titles]

In [96]:
titles=[[y for y in x if nltk.pos_tag([y])[0][1] in ['NN', 'NNS']] for x in titles]

### Extract top unigrams

In [None]:
from nltk import word_tokenize
from nltk.collocations import BigramCollocationFinder

In [None]:
from nltk import word_tokenize 
from nltk.util import ngrams

In [201]:
init_unigrams=[y for x in titles for y in x]

In [203]:
unigrams_count=[(x, init_unigrams.count(x)) for x in set(init_unigrams)]

In [204]:
unigrams_count.sort(key = lambda t: t[1])

In [386]:
unigrams_count_d=dict(zip([x[0] for x in unigrams_count], [x[1] for x in unigrams_count]))

In [388]:
unigrams_count[::-1][:200]

[('sauce', 1680),
 ('salad', 1540),
 ('chicken', 1221),
 ('cream', 869),
 ('cheese', 817),
 ('soup', 617),
 ('tomato', 610),
 ('lemon', 557),
 ('chocolate', 528),
 ('pork', 512),
 ('potato', 461),
 ('cake', 459),
 ('garlic', 436),
 ('shrimp', 434),
 ('rice', 414),
 ('turkey', 414),
 ('pepper', 402),
 ('lamb', 396),
 ('butter', 394),
 ('potatoes', 384),
 ('corn', 383),
 ('orange', 378),
 ('beef', 376),
 ('roast', 370),
 ('pie', 354),
 ('apple', 349),
 ('ginger', 341),
 ('onion', 333),
 ('vinaigrette', 329),
 ('salsa', 320),
 ('bean', 313),
 ('salmon', 311),
 ('bacon', 310),
 ('tomatoes', 309),
 ('bread', 299),
 ('spicy', 299),
 ('sweet', 296),
 ('spinach', 289),
 ('vegetable', 288),
 ('fennel', 284),
 ('onions', 275),
 ('ice', 274),
 ('goat', 269),
 ('pasta', 259),
 ('mustard', 253),
 ('sausage', 249),
 ('tart', 249),
 ('mint', 245),
 ('mushroom', 244),
 ('squash', 241),
 ('beans', 233),
 ('coconut', 227),
 ('lime', 220),
 ('mushrooms', 212),
 ('arugula', 211),
 ('vegetables', 208),
 ('

In [387]:
unigrams_count_d['pasta']

259

### Extract bigrams

In [73]:
from nltk import word_tokenize
from nltk.collocations import BigramCollocationFinder

In [74]:
from nltk import word_tokenize 
from nltk.util import ngrams

In [97]:
init_bigrams=[list(ngrams(x, 2)) for x in titles]

  """Entry point for launching an IPython kernel.


In [98]:
bigrams=[y for x in init_bigrams for y in x]

In [99]:
bigrams_count=[(x, bigrams.count(x)) for x in set(bigrams)]

In [100]:
bigrams_count.sort(key = lambda t: t[1])

In [101]:
bigrams_count=bigrams_count[::-1]

In [384]:
[(x,' '.join(x[0])) for x in bigrams_count[400:500]]

[((('giblet', 'gravy'), 9), 'giblet gravy'),
 ((('salt', 'pepper'), 9), 'salt pepper'),
 ((('snow', 'peas'), 9), 'snow peas'),
 ((('gold', 'potato'), 9), 'gold potato'),
 ((('monterey', 'jack'), 9), 'monterey jack'),
 ((('cider', 'vinaigrette'), 9), 'cider vinaigrette'),
 ((('pork', 'roast'), 9), 'pork roast'),
 ((('salad', 'blue'), 9), 'salad blue'),
 ((('macaroni', 'cheese'), 9), 'macaroni cheese'),
 ((('stuffed', 'eggs'), 9), 'stuffed eggs'),
 ((('beef', 'stock'), 9), 'beef stock'),
 ((('spinach', 'feta'), 9), 'spinach feta'),
 ((('potato', 'purã©e'), 9), 'potato purã©e'),
 ((('romesco', 'sauce'), 9), 'romesco sauce'),
 ((('carrot', 'soup'), 9), 'carrot soup'),
 ((('chocolate', 'torte'), 9), 'chocolate torte'),
 ((('game', 'hen'), 9), 'game hen'),
 ((('chicken', 'tomato'), 9), 'chicken tomato'),
 ((('cranberry', 'orange'), 9), 'cranberry orange'),
 ((('bundt', 'cake'), 9), 'bundt cake'),
 ((('salmon', 'horseradish'), 9), 'salmon horseradish'),
 ((('baby', 'greens'), 9), 'baby greens

### Extract trigrams

In [209]:
init_trigrams=[list(ngrams(x, 3)) for x in titles]

  """Entry point for launching an IPython kernel.


In [210]:
trigrams=[y for x in init_trigrams for y in x]

In [212]:
trigrams_count=[(x, trigrams.count(x)) for x in set(trigrams)]
trigrams_count.sort(key = lambda t: t[1])
trigrams_count=trigrams_count[::-1]

In [213]:
[(x,' '.join(x[0])) for x in trigrams_count[:200]]

[((('sugar', 'snap', 'peas'), 31), 'sugar snap peas'),
 ((('vanilla', 'ice', 'cream'), 28), 'vanilla ice cream'),
 ((('turkey', 'giblet', 'stock'), 20), 'turkey giblet stock'),
 ((('cake', 'cream', 'cheese'), 17), 'cake cream cheese'),
 ((('goat', 'cheese', 'salad'), 16), 'goat cheese salad'),
 ((('spicy', 'tomato', 'sauce'), 15), 'spicy tomato sauce'),
 ((('ice', 'cream', 'cake'), 13), 'ice cream cake'),
 ((('sugar', 'snap', 'pea'), 12), 'sugar snap pea'),
 ((('baby', 'bok', 'choy'), 12), 'baby bok choy'),
 ((('butternut', 'squash', 'soup'), 12), 'butternut squash soup'),
 ((('roast', 'leg', 'lamb'), 11), 'roast leg lamb'),
 ((('lemon', 'ice', 'cream'), 11), 'lemon ice cream'),
 ((('pico', 'de', 'gallo'), 11), 'pico de gallo'),
 ((('roast', 'pork', 'tenderloin'), 10), 'roast pork tenderloin'),
 ((('bittersweet', 'chocolate', 'sauce'), 10), 'bittersweet chocolate sauce'),
 ((('bell', 'pepper', 'sauce'), 10), 'bell pepper sauce'),
 ((('roast', 'pork', 'loin'), 10), 'roast pork loin'),
 