# EDA on Food.com: Recipes and Interactions

This dataset was obtained from [this link](https://www.kaggle.com/datasets/shuyangli94/food-com-recipes-and-user-interactions).

This dataset consists of 180K+ recipes and 700K+ recipe reviews covering 18 years of user interactions and uploads on Food.com (formerly GeniusKitchen). used in the following paper:

[Generating Personalized Recipes from Historical User Preferences Bodhisattwa Prasad Majumder, Shuyang Li, Jianmo Ni, Julian McAuley EMNLP, 2019](https://www.aclweb.org/anthology/D19-1613)


There is a preprocessed data that starts with the `PP` suffix, and then there is the raw data with the `raw` suffix.

In [1]:
import pandas as pd
import os

In [2]:
data_path = '../data/raw/'
raw_data_file = 'RAW_recipes.csv'
processed_data_file = 'PP_recipes.csv'
ingredient_map_file = 'ingr_map.pkl'

raw_data_path = os.path.join(data_path, raw_data_file)
processed_data_path = os.path.join(data_path, processed_data_file)
ingredient_map_path = os.path.join(data_path, ingredient_map_file)

ingredient_map = pd.read_pickle(ingredient_map_path)
raw_data = pd.read_csv(raw_data_path)
processed_data = pd.read_csv(processed_data_path)

In [3]:
# Aux functions:
def techinques(lst):
    indices = [i for i, x in enumerate(lst) if x == 1]
    techinques_vals = [TECHNIQUES_LIST[i] for i in indices]
    return techinques_vals

In [4]:
# Add functionality to data

raw_data['nutrition_list'] = raw_data.nutrition.str.replace('[','').str.replace(']','').str.split(', ')
raw_data['nutrition_list'] = raw_data.nutrition_list.explode().astype('float').groupby(level=0).apply(lambda x: x.to_list())
raw_data['ingredients']  = raw_data.ingredients.str.replace('[','').str.replace(']','').str.replace("'", '').str.split(', ')

# According to Kaggle link the correct order is the following: 
raw_data['calories__nb'] = raw_data['nutrition_list'].apply(lambda x: x[0])
raw_data['total_fat__pvd'] = raw_data['nutrition_list'].apply(lambda x: x[1])
raw_data['sugar__pvd'] = raw_data['nutrition_list'].apply(lambda x: x[2])
raw_data['sodium__pvd'] = raw_data['nutrition_list'].apply(lambda x: x[3])
raw_data['protein__pvd'] = raw_data['nutrition_list'].apply(lambda x: x[4])
raw_data['sat_fat__pvd'] = raw_data['nutrition_list'].apply(lambda x: x[5])
raw_data['carbs__pvd'] = raw_data['nutrition_list'].apply(lambda x: x[6])
raw_data.drop(['nutrition', 'nutrition_list'], axis=1)

# According to the code in the official repo: 
# https://github.com/majumderb/recipe-personalization/blob/master/recipe_gen/language.py these are the techniques not encoded. 
TECHNIQUES_LIST = [
    'bake',
    'barbecue',
    'blanch',
    'blend',
    'boil',
    'braise',
    'brine',
    'broil',
    'caramelize',
    'combine',
    'crock pot',
    'crush',
    'deglaze',
    'devein',
    'dice',
    'distill',
    'drain',
    'emulsify',
    'ferment',
    'freez',
    'fry',
    'grate',
    'griddle',
    'grill',
    'knead',
    'leaven',
    'marinate',
    'mash',
    'melt',
    'microwave',
    'parboil',
    'pickle',
    'poach',
    'pour',
    'pressure cook',
    'puree',
    'refrigerat',
    'roast',
    'saute',
    'scald',
    'scramble',
    'shred',
    'simmer',
    'skillet',
    'slow cook',
    'smoke',
    'smooth',
    'soak',
    'sous-vide',
    'steam',
    'stew',
    'strain',
    'tenderize',
    'thicken',
    'toast',
    'toss',
    'whip',
    'whisk',
]
processed_data['ingredient_ids_list'] = processed_data.ingredient_ids.str.replace('[','').str.replace(']','').str.split(', ')
processed_data['techniques_list'] = processed_data.techniques.str.replace('[','').str.replace(']','').str.split(', ')
processed_data['techniques_list'] = processed_data.techniques_list.explode().astype('int').groupby(level=0).apply(lambda x: x.to_list())
processed_data['techniques_list'] = processed_data.techniques_list.apply(techinques)

In [5]:
reduced_ingredient_map = ingredient_map[['replaced','id']].drop_duplicates('id').rename(columns={'id':'ingredient_ids_list'})
reduced_ingredient_map['ingredient_ids_list'] = reduced_ingredient_map['ingredient_ids_list'].astype('str')
processed_data_ingredient_list = processed_data[['id', 'ingredient_ids_list']]\
.explode('ingredient_ids_list')\
.merge(reduced_ingredient_map, on='ingredient_ids_list', how='left')\
.groupby('id')\
.agg(ingredient_list = ('replaced', lambda x: x.to_list()))\
.reset_index()

In [6]:
raw_data_relevant_features = ['name', 'id', 'minutes', 'tags', 'n_steps', 'steps', 'ingredients', 'n_ingredients']
nutritional_features = ['calories__nb', 'total_fat__pvd', 'sugar__pvd', 'sodium__pvd', 'protein__pvd', 'sat_fat__pvd', 'carbs__pvd']
data = raw_data[
    raw_data_relevant_features + nutritional_features
].copy()
data = data\
.merge(processed_data_ingredient_list, on='id', how='inner')\
.merge(processed_data[['id', 'calorie_level', 'techniques_list']], on='id', how='inner')

In [7]:
data.head(3)

Unnamed: 0,name,id,minutes,tags,n_steps,steps,ingredients,n_ingredients,calories__nb,total_fat__pvd,sugar__pvd,sodium__pvd,protein__pvd,sat_fat__pvd,carbs__pvd,ingredient_list,calorie_level,techniques_list
0,arriba baked winter squash mexican style,137739,55,"['60-minutes-or-less', 'time-to-make', 'course...",11,"['make a choice and proceed with recipe', 'dep...","[winter squash, mexican seasoning, mixed spice...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0,"[winter squash, mexican seasoning, mixed spice...",0,"[bake, grate, melt]"
1,a bit different breakfast pizza,31490,30,"['30-minutes-or-less', 'time-to-make', 'course...",9,"['preheat oven to 425 degrees f', 'press dough...","[prepared pizza crust, sausage patty, eggs, mi...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0,"[pizza crust, sausage, egg, milk, salt and pep...",0,"[bake, pour, whisk]"
2,alouette potatoes,59389,45,"['60-minutes-or-less', 'time-to-make', 'course...",11,['place potatoes in a large pot of lightly sal...,"[spreadable cheese with garlic and herbs, new ...",11,368.1,17.0,10.0,2.0,14.0,8.0,20.0,"[cheese, new potato, shallot, parsley, tarrago...",1,"[bake, boil, dice, drain]"


In [21]:
data_sample = data.sample(1)
data_sample.ingredients.iloc[0], data_sample.ingredient_list.iloc[0]

(['water',
  'dried split peas',
  'chicken broth',
  'ham bone',
  'dried marjoram',
  'black pepper',
  'bay leaf',
  'carrots',
  'celery ribs',
  'onion'],
 ['water',
  'dried split pea',
  'chicken broth',
  'ham bone',
  'dried marjoram',
  'black pepper',
  'bay leaf',
  'carrot',
  'celery rib',
  'onion'])

In [37]:
exploded_ingredients = data.ingredient_list.explode()
exploded_ingredients.loc[exploded_ingredients=='garlic clove'] = 'garlic'
exploded_ingredients.loc[exploded_ingredients=='garlic clove'] = 'garlic'

In [48]:
exploded_ingredients[exploded_ingredients.str.contains('vegan')].value_counts()

ingredient_list
vegan margarine                     73
vegan chocolate chip                22
vegan butter                        21
vegan sugar                         16
vegan parmesan cheese               15
vegan sour cream                    12
vegan chicken seasoning              6
vegan egg replacer powder            6
vegan worcestershire sauce           5
vegan burger                         5
vegan cheese                         5
non-hydrogenated vegan margarine     5
vegan chicken                        5
vegan egg substitute                 3
vegan powdered sugar                 2
vegan chocolate                      2
vegan pie crust                      1
Name: count, dtype: int64

In [49]:
data[data.name.str.contains(' vegan')]

Unnamed: 0,name,id,minutes,tags,n_steps,steps,ingredients,n_ingredients,calories__nb,total_fat__pvd,sugar__pvd,sodium__pvd,protein__pvd,sat_fat__pvd,carbs__pvd,ingredient_list,calorie_level,techniques_list
14,cream of cauliflower soup vegan,23850,110,"['lactose', 'weeknight', 'time-to-make', 'cour...",10,['heat the oil or margarine in a soup pot and ...,"[canola oil, onion, garlic, cauliflower, potat...",16,174.2,4.0,24.0,1.0,15.0,1.0,10.0,"[canola oil, onion, garlic, cauliflower, potat...",0,"[blend, puree, saute, simmer, smooth]"
16,cream of spinach soup vegan,24701,55,"['60-minutes-or-less', 'time-to-make', 'course...",10,"['in a 3 qt saucepan over medium high heat , s...","[onion, scallion, apple juice, olive oil, spin...",12,64.8,3.0,13.0,54.0,4.0,2.0,3.0,"[onion, scallion, apple juice, olive oil, spin...",0,"[blend, boil, puree, saute, simmer, smooth]"
69,sour cream avocado dip vegan,112959,10,"['lactose', '15-minutes-or-less', 'time-to-mak...",6,"['peel and pit avocado , put in a large mixing...","[soft silken tofu, avocado, chunky salsa, fres...",7,12.8,1.0,0.0,0.0,0.0,0.0,0.0,"[soft silken tofu, avocado, chunky salsa, fres...",0,"[blend, combine, smooth]"
135,cream of pumpkin or squash soup vegan,164526,30,"['30-minutes-or-less', 'time-to-make', 'course...",9,"['place all ingredients except for banana , ap...","[pumpkin puree, vegetable broth, small red pot...",14,89.3,0.0,35.0,0.0,4.0,0.0,7.0,"[pumpkin puree, vegetable broth, small red pot...",0,[puree]
138,creamy vegan potato leek soup,343338,40,"['lactose', '60-minutes-or-less', 'time-to-mak...",10,"['heat olive oil in a 4-quart pot', 'sautee th...","[olive oil, leeks, garlic cloves, russet potat...",8,183.0,3.0,10.0,0.0,13.0,1.0,11.0,"[olive oil, leek, garlic clove, russet potato,...",0,"[blend, boil, puree, saute, simmer, smooth]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177118,yummy whole wheat bread vegan,327738,220,"['time-to-make', 'course', 'preparation', 'hea...",9,['in a large bowl dissolve the yeast in the wa...,"[active dry yeast, warm water, grapeseed oil, ...",6,1478.3,35.0,112.0,122.0,102.0,14.0,96.0,"[yeast, warm water, grapeseed oil, brown sugar...",2,"[bake, knead, smooth]"
177136,yummy vegan pumpkin cookies,208051,12,"['15-minutes-or-less', 'time-to-make', 'course...",6,"['pour oil in mixing bowl', 'add sugar and pum...","[olive oil, sugar, pumpkin, vanilla extract, a...",9,115.4,7.0,34.0,3.0,2.0,3.0,5.0,"[olive oil, sugar, pumpkin, vanilla extract, a...",0,"[bake, pour]"
177210,zen habits vegan three bean chili,230854,45,"['60-minutes-or-less', 'time-to-make', 'course...",5,"['heat olive oil on medium-high heat', 'saute ...","[vegetarian ground beef, pinto beans, kidney b...",13,334.2,10.0,46.0,48.0,40.0,4.0,16.0,"[vegetarian ground beef, pinto bean, kidney be...",1,"[saute, simmer, stew]"
177803,zucchini brownies vegan,384645,40,"['lactose', '60-minutes-or-less', 'time-to-mak...",7,['preheat oven to 350 and spray a 9x13 baking ...,"[applesauce, raw sugar, maple syrup, vanilla, ...",11,78.6,0.0,36.0,7.0,3.0,1.0,6.0,"[applesauce, raw sugar, maple syrup, vanilla, ...",0,"[bake, combine]"


In [54]:
data[data.name.str.contains('vegan') & data.name.str.contains('meat')]

Unnamed: 0,name,id,minutes,tags,n_steps,steps,ingredients,n_ingredients,calories__nb,total_fat__pvd,sugar__pvd,sodium__pvd,protein__pvd,sat_fat__pvd,carbs__pvd,ingredient_list,calorie_level,techniques_list
60637,easy tofu no meat balls vegan and gluten free,361658,20,"['30-minutes-or-less', 'time-to-make', 'course...",4,['combine first 6 ingredients in medium mixing...,"[firm tofu, quick oats, braggs liquid aminos, ...",9,212.0,13.0,3.0,0.0,27.0,7.0,7.0,"[firm tofu, quick oat, braggs liquid amino, fr...",0,"[combine, saute]"
63979,favorite meat loaf vegan,414222,60,"['60-minutes-or-less', 'time-to-make', 'prepar...",8,"['preheat oven to 350 degrees f , then oil an ...","[sesame oil, onions, carrot, fresh garlic, dri...",12,64.3,6.0,7.0,6.0,1.0,2.0,2.0,"[sesame oil, onion, carrot, fresh garlic, drie...",0,"[bake, blend, combine, saute, skillet]"
91667,kottbullar swedish meatless balls vegan,426757,94,"['time-to-make', 'course', 'main-ingredient', ...",11,"['place potatoes in medium saucepan , cover wi...","[potatoes, vegetable bouillon cube, ground alm...",7,427.3,43.0,13.0,0.0,19.0,15.0,12.0,"[potato, vegetable bouillon cube, ground almon...",1,"[boil, drain, fry, mash, pour, soak]"
165190,tofu walnut meatballs vegan,467898,50,"['60-minutes-or-less', 'time-to-make', 'course...",10,"['mix first 4 ingredients in a large bowl', 'a...","[bread, walnuts, oats, diced onion, garlic pow...",12,300.2,26.0,10.0,30.0,30.0,10.0,8.0,"[bread, walnut, oat, diced onion, garlic powde...",1,"[bake, combine, drain, freez]"
170131,vegan meatballs,416845,25,"['30-minutes-or-less', 'time-to-make', 'prepar...",4,['combine all ingredients in a bowl or electri...,"[lightlife gimme lean ground sausage, veggie c...",11,117.4,10.0,0.0,6.0,2.0,4.0,4.0,"[sausage, veggie crumble, cracker crumb, onion...",0,[combine]
170178,vegan beefless burgers or meatballs,21566,40,"['60-minutes-or-less', 'time-to-make', 'course...",9,"['combine all ingredients', 'mix well', 'use l...","[textured vegetable protein, water, sunflower ...",12,199.1,16.0,13.0,20.0,15.0,5.0,6.0,"[textured vegetable protein, water, sunflower ...",0,"[bake, combine, fry, pickle]"
170360,vegan meatless loaf with seitan,290989,65,"['time-to-make', 'course', 'main-ingredient', ...",9,"['preheat oven to 350', 'soften oatmeal in 1 /...","[oats, seitan, bulgar wheat, rice, tofu, olive...",19,364.9,14.0,9.0,17.0,29.0,6.0,19.0,"[oat, seitan, bulgar wheat, rice, tofu, olive ...",1,"[bake, combine, drain, freez]"
170361,vegan meatloaf 1,254279,120,"['lactose', 'time-to-make', 'course', 'main-in...",13,['pulse the pine nuts in a food processor unti...,"[black beans, carrots, onion, celery ribs, zuc...",18,278.2,20.0,18.0,17.0,19.0,6.0,11.0,"[black bean, carrot, onion, celery rib, zucchi...",1,"[bake, mash, steam]"
170878,vegetarian vegan meatloaf,363490,70,"['time-to-make', 'preparation', 'easy', 'vegan...",10,"['preheat oven to 350', 'grind walnuts in food...","[walnuts, celery ribs, yellow onion, garlic cl...",12,173.5,13.0,10.0,51.0,20.0,4.0,5.0,"[walnut, celery rib, yellow onion, garlic clov...",0,"[bake, barbecue, drain, mash, saute, skillet]"
171273,veggie lunch meat vegan dad,376410,145,"['time-to-make', 'course', 'main-ingredient', ...",17,"['method', 'get water steaming in your steamer...","[white beans, water, oil, salt, paprika, onion...",13,834.4,86.0,6.0,209.0,43.0,37.0,22.0,"[white bean, water, oil, salt, paprika, onion ...",2,"[bake, blend, pour, smooth, steam]"
