In [1]:
import pandas as pd
import pickle
from surprise import Reader, Dataset, SVD

In [2]:
df = pd.read_csv('../dataset/preprocessed_data.csv')

In [3]:
df.head()

Unnamed: 0,user_id,recipe_id,date,rating,review,minutes,contributor_id,submitted,tags,n_steps,...,sugar,sodium,protein,saturated_fat,carbohydrates,food_types,negative,neutral,positive,compound
0,492,20636,2002-12-01,4,this worked very well and is easy. i used not ...,20,56824,2002-10-27,"30-minutes-or-less, time-to-make, course, main...",5,...,39.0,5.0,4.0,11.0,5.0,Healthy,0.0,0.598,0.402,0.8553
1,8204,64566,2005-09-02,4,very good,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",10,...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.0,0.238,0.762,0.4927
2,28657,64566,2005-12-22,5,better than the real,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",10,...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.0,0.508,0.492,0.4404
3,36365,64566,2006-09-26,5,absolutely awesome i was speechless when i tri...,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",10,...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.0,0.883,0.117,0.659
4,20197,64566,2007-03-09,5,these taste absolutely wonderful my son in law...,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",10,...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.0,0.675,0.325,0.8908


In [4]:
reader = Reader()
data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)

In [5]:
train_set = data.build_full_trainset()

In [6]:
algo = SVD()

In [7]:
algo.fit(train_set)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2b2c3ba30>

In [9]:
# save the model to disk
filename = 'svd_model.sav'
pickle.dump(algo, open(filename, 'wb'))

In [10]:
# load the model from disk
algo = pickle.load(open(filename, 'rb'))

In [12]:
def get_recommendations(user_id, n=10):
    user_ratings = df[df.user_id == user_id]
    rated_recipes = user_ratings.recipe_id.unique().tolist()
    unrated_recipes = df[~df.recipe_id.isin(rated_recipes)].recipe_id.unique().tolist()

    test_set = [[user_id, recipe_id, 0] for recipe_id in unrated_recipes]
    predictions = algo.test(test_set)

    # sort predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)

    top_n = predictions[:n]

    top_n_recipes = [recipe_id for user_id, recipe_id, _, _, _ in top_n]

    # return the recipe_id from df that match the top_n_recipes
    return df[df.recipe_id.isin(top_n_recipes)].drop_duplicates(subset=['recipe_id'])

In [13]:
get_recommendations(8937)

Unnamed: 0,user_id,recipe_id,date,rating,review,minutes,contributor_id,submitted,tags,n_steps,...,sugar,sodium,protein,saturated_fat,carbohydrates,food_types,negative,neutral,positive,compound
2510,3672,22912,2003-07-12,5,excellent my family comments on these chimicha...,25,39547,2002-12-22,"30-minutes-or-less, time-to-make, course, main...",9,...,15.0,24.0,23.0,26.0,9.0,Healthy,0.145,0.699,0.155,0.5994
5739,42469,99060,2007-03-06,5,this was really tasty a definite winner. i did...,150,104929,2007-02-15,"time-to-make, course, main-ingredient, prepara...",12,...,74.0,3.0,6.0,43.0,7.0,Non-Veg dessert,0.056,0.762,0.182,0.6643
14472,160943,197668,2015-12-05,5,looks delicious and i think my kids would eat ...,40,1856536,2015-11-30,"60-minutes-or-less, time-to-make, course, prep...",15,...,7.0,28.0,32.0,30.0,9.0,Non-veg,0.0,0.684,0.316,0.5719
20028,2323,7217,2002-02-10,0,ennyrat....thanks for your comments about the ...,55,29212,2002-01-27,"60-minutes-or-less, time-to-make, course, prep...",13,...,180.0,21.0,17.0,92.0,26.0,Non-veg,0.0,0.955,0.045,0.2006
31126,3900,10686,2002-07-14,5,this was really quite quick to make and i used...,85,2586,2002-04-08,"weeknight, time-to-make, course, preparation, ...",19,...,98.0,14.0,11.0,40.0,15.0,Veg,0.0,0.876,0.124,0.659
35699,12784,146696,2008-10-18,5,i broiled mine for quite a while turning once ...,35,110433,2008-10-14,"60-minutes-or-less, time-to-make, main-ingredi...",5,...,65.0,28.0,83.0,50.0,7.0,Non-veg,0.036,0.783,0.181,0.836
45810,20943,23680,2006-03-09,5,i love these pancakes made them as waffles too...,20,54023,2003-01-13,"30-minutes-or-less, time-to-make, course, main...",5,...,13.0,41.0,19.0,40.0,14.0,Veg,0.054,0.588,0.357,0.9245
56947,125895,157936,2012-10-10,5,this was the first time we tried frying fish i...,30,129177,2009-03-30,"30-minutes-or-less, time-to-make, preparation,...",17,...,0.0,34.0,88.0,1.0,14.0,Veg,0.022,0.851,0.127,0.7781
66432,8624,32171,2007-09-24,5,wow. i used real bacon and the grease and chic...,15,56130,2003-07-28,"15-minutes-or-less, time-to-make, course, main...",7,...,7.0,8.0,6.0,9.0,2.0,Healthy,0.0,0.644,0.356,0.802
68609,28702,67908,2005-12-18,5,made this yesterday and we all loved it..very ...,210,49304,2005-10-14,"time-to-make, course, main-ingredient, prepara...",5,...,16.0,23.0,17.0,17.0,6.0,Healthy,0.0,0.541,0.459,0.7783


In [15]:
get_recommendations(353911).recipe_id.tolist()

[197004, 197668, 193741, 192237, 88611, 192507, 118841, 30763, 15492, 59378]