In [38]:
import pandas as pd
import numpy as np
from collections import Counter
import random
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.sparse import coo_matrix
import pickle
from scipy.sparse import hstack
from collections import defaultdict
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse.linalg import svds

my_seed = 0
random.seed(my_seed)

In [6]:
file = open('output/recipes.pkl',"rb")
recipes = pickle.load(file)

In [7]:
recipes.head()

Unnamed: 0,name,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,new_recipe_id
0,arriba baked winter squash mexican style,55,47892,2005-09-16,"[60-minutes-or-less, time-to-make, course, mai...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"[make a choice and proceed with recipe, depend...",autumn is my favorite time of year to cook! th...,"[winter squash, mexican seasoning, mixed spice...",7,79681
1,a bit different breakfast pizza,30,26278,2002-06-17,"[30-minutes-or-less, time-to-make, course, mai...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"[preheat oven to 425 degrees f, press dough in...",this recipe calls for the crust to be prebaked...,"[prepared pizza crust, sausage patty, eggs, mi...",6,17031
2,all in the kitchen chili,130,196586,2005-02-25,"[time-to-make, course, preparation, main-dish,...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"[brown ground beef in large pot, add chopped o...",this modified version of 'mom's' chili was a h...,"[ground beef, yellow onions, diced tomatoes, t...",13,65277
3,alouette potatoes,45,68585,2003-04-14,"[60-minutes-or-less, time-to-make, course, mai...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,[place potatoes in a large pot of lightly salt...,"this is a super easy, great tasting, make ahea...","[spreadable cheese with garlic and herbs, new ...",11,33775
4,amish tomato ketchup for canning,190,41706,2002-10-25,"[weeknight, time-to-make, course, main-ingredi...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,"[mix all ingredients& boil for 2 1 / 2 hours ,...",my dh's amish mother raised him on this recipe...,"[tomato juice, apple cider vinegar, sugar, sal...",8,24632


In [65]:
recipes.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 231637 entries, 0 to 231636
Data columns (total 13 columns):
name              231636 non-null object
minutes           231637 non-null int64
contributor_id    231637 non-null int64
submitted         231637 non-null object
tags              231637 non-null object
nutrition         231637 non-null object
n_steps           231637 non-null int64
steps             231637 non-null object
description       226658 non-null object
ingredients       231637 non-null object
n_ingredients     231637 non-null int64
new_recipe_id     231637 non-null int64
calorie           231637 non-null float64
dtypes: float64(1), int64(5), object(7)
memory usage: 24.7+ MB


Our next step is to construct a table that is similar to a UI table. Only that rows are recipe names and columns are recipe attributes

We will apply SVD on it and try to get a recommendation from it

In [9]:
recipes['calorie'] = [item[0] for item in recipes.nutrition]

In [10]:
def treat_ingredients(input):
    output = []
    for ingredient in input:
        ingredient_list = ingredient.split(' ')
        output.append("_".join(ingredient_list))
    return " ".join(output)

In [66]:
recipes_2 = recipes.set_index('new_recipe_id').sort_index()

In [68]:
recipes_2.iloc[23]

name                                            butter madeira cake
minutes                                                         175
contributor_id                                                 1543
submitted                                                1999-09-12
tags              [weeknight, time-to-make, course, preparation,...
nutrition               [373.8, 30.0, 88.0, 10.0, 11.0, 59.0, 14.0]
n_steps                                                          15
steps             [grease lined tin with melted butter, sift flo...
description       this is a good stand by and easy mixed in a fo...
ingredients       [flour, baking powder, caster sugar, butter, e...
n_ingredients                                                     7
calorie                                                       373.8
Name: 23, dtype: object

In [69]:
ingredients_all = recipes_2.ingredients.apply(lambda x: treat_ingredients(x))

In [70]:
tfidf = TfidfVectorizer(max_df = 0.7,min_df = 2)

In [71]:
ing_tfidf = coo_matrix(tfidf.fit_transform(ingredients_all))

In [72]:
columns_keep = ['minutes','contributor_id','calorie']

In [73]:
recipes_att = coo_matrix(recipes_2[columns_keep].to_numpy())

In [74]:
recipes_attr = hstack((recipes_att,ing_tfidf))

In [75]:
U, S, VT = svds(recipes_attr)

In [92]:
def get_recommends_similar_users(iids, U, n_reco = 10):
    users_to_rec = [iid for iid in range(U.shape[0]) if iid not in iids]
    
    user_sim_score = []
    
    for user in users_to_rec:
        user_sim_score.append(np.mean([np.dot(U[userid],U[user]) for userid in iids]))
        
    userrecs = zip(users_to_rec,user_sim_score)
    
    final_rec = [i[0] for i in sorted(userrecs,key=lambda x: x[1],reverse=True)]

    return final_rec[:n_reco]

def save_file_to_pickle(item, file_name, file_type = 'obj'):
    file = open(f'output/{file_name}.{file_type}', 'wb') 
    pickle.dump(item, file)
    file.close()

In [50]:
# we need to convert back to the recipes DT index (because we have not index the column as per our new id.. should have done that to avoid confusion)

iids =[23,56,34,111]


In [86]:
save_file_to_pickle(U,"rep_mtx","pkl")

In [93]:
get_recommends_similar_users(iids,U)

[11898, 165557, 46982, 63849, 210100, 88302, 200512, 10159, 26425, 4647]

In [94]:
recipes.iloc[11898]

name                            baby bananas in orange sauce  benin
minutes                                                          25
contributor_id                                               593927
submitted                                                2011-05-19
tags              [30-minutes-or-less, time-to-make, course, mai...
nutrition                  [343.6, 0.0, 292.0, 0.0, 3.0, 0.0, 29.0]
n_steps                                                           6
steps             [heat orange juice , brown sugar , and lemon j...
description       dd (toddler) had us try this one and we both d...
ingredients       [orange juice, brown sugar, lemon juice, bananas]
n_ingredients                                                     4
new_recipe_id                                                216330
calorie                                                       343.6
Name: 11898, dtype: object

In [95]:
recipes.iloc[165557]

name                           prairie potatoes  easy microwave fix
minutes                                                          31
contributor_id                                               296809
submitted                                                2007-11-12
tags              [60-minutes-or-less, time-to-make, course, mai...
nutrition               [526.5, 41.0, 16.0, 37.0, 47.0, 68.0, 16.0]
n_steps                                                           9
steps             [prick potato skins, wrap ea potato in kitchen...
description       this is recipe #6 from *microwave cooking* by ...
ingredients       [potatoes, butter, milk, corned beef, corn, ch...
n_ingredients                                                     8
new_recipe_id                                                143417
calorie                                                       526.5
Name: 165557, dtype: object

In [96]:
recipes.iloc[46982]

name                                         chicken potato chowder
minutes                                                         315
contributor_id                                               123871
submitted                                                2005-09-19
tags              [time-to-make, course, preparation, healthy, s...
nutrition                 [236.2, 5.0, 14.0, 19.0, 25.0, 4.0, 13.0]
n_steps                                                           5
steps             [cut chicken into bite-sized pieces and saute ...
description       this is a yummy slow cooker soup. it is from t...
ingredients       [chicken breast halves, red potatoes, frozen m...
n_ingredients                                                     6
new_recipe_id                                                 79733
calorie                                                       236.2
Name: 46982, dtype: object