In [1]:
import pandas as pd
import pickle
from surprise import Reader, Dataset, SVD

In [12]:
df = pd.read_csv('../dataset/preprocessed_data.csv')

In [13]:
df.head()

Unnamed: 0,user_id,recipe_id,date,rating,review,name,minutes,contributor_id,submitted,tags,...,sugar,sodium,protein,saturated_fat,carbohydrates,food_types,negative,neutral,positive,compound
0,7708,60599,2005-09-02,4,very good,kfc honey bbq strips,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.0,0.238,0.762,0.4927
1,27707,60599,2005-12-22,5,better than the real,kfc honey bbq strips,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.0,0.508,0.492,0.4404
2,35308,60599,2006-09-26,5,absolutely awesome i was speechless when i tri...,kfc honey bbq strips,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.0,0.883,0.117,0.659
3,19399,60599,2007-03-09,5,these taste absolutely wonderful my son in law...,kfc honey bbq strips,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.0,0.675,0.325,0.8908
4,43887,60599,2008-02-20,0,made my own buttermilk w vinegar and milk. use...,kfc honey bbq strips,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.0,0.929,0.071,0.4588


In [14]:
reader = Reader()
data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)

In [15]:
train_set = data.build_full_trainset()

In [16]:
algo = SVD()

In [17]:
algo.fit(train_set)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2f9ca2190>

In [18]:
# save the model to disk
filename = 'svd_model.sav'
pickle.dump(algo, open(filename, 'wb'))

In [19]:
# load the model from disk
algo = pickle.load(open(filename, 'rb'))

In [26]:
def get_recommendations(user_id, n=10):
    user_ratings = df[df.user_id == user_id]
    rated_recipes = user_ratings.recipe_id.unique().tolist()
    unrated_recipes = df[~df.recipe_id.isin(rated_recipes)].recipe_id.unique().tolist()

    test_set = [[user_id, recipe_id, 0] for recipe_id in unrated_recipes]
    predictions = algo.test(test_set)

    # sort predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)

    top_n = predictions[:n]
    top_n_recipes = [recipe_id for user_id, recipe_id, _, _, _ in top_n]
    # return the recipe_id from df that match the top_n_recipes
    return df[df.recipe_id.isin(top_n_recipes)].drop_duplicates(subset=['recipe_id'])

In [28]:
get_recommendations(7708).recipe_id.tolist()

[Prediction(uid=7708, iid=15023, r_ui=0, est=5, details={'was_impossible': False}), Prediction(uid=7708, iid=13030, r_ui=0, est=4.899395498443079, details={'was_impossible': False}), Prediction(uid=7708, iid=34852, r_ui=0, est=4.892222152872712, details={'was_impossible': False}), Prediction(uid=7708, iid=34894, r_ui=0, est=4.8635280731904835, details={'was_impossible': False}), Prediction(uid=7708, iid=6747, r_ui=0, est=4.837831932717456, details={'was_impossible': False}), Prediction(uid=7708, iid=28731, r_ui=0, est=4.831488943937534, details={'was_impossible': False}), Prediction(uid=7708, iid=16707, r_ui=0, est=4.815866335871606, details={'was_impossible': False}), Prediction(uid=7708, iid=37575, r_ui=0, est=4.806183659223117, details={'was_impossible': False}), Prediction(uid=7708, iid=1268, r_ui=0, est=4.798007922154066, details={'was_impossible': False}), Prediction(uid=7708, iid=192839, r_ui=0, est=4.790228706915278, details={'was_impossible': False})]
[15023, 13030, 34852, 348

[192839, 28731, 34894, 16707, 37575, 13030, 15023, 1268, 34852, 6747]

In [29]:
get_recommendations(353911).recipe_id.tolist()

[Prediction(uid=353911, iid=188078, r_ui=0, est=4.9538680475658055, details={'was_impossible': False}), Prediction(uid=353911, iid=188348, r_ui=0, est=4.903862190980938, details={'was_impossible': False}), Prediction(uid=353911, iid=192839, r_ui=0, est=4.902673067705568, details={'was_impossible': False}), Prediction(uid=353911, iid=189581, r_ui=0, est=4.898042108468786, details={'was_impossible': False}), Prediction(uid=353911, iid=27195, r_ui=0, est=4.891106262802855, details={'was_impossible': False}), Prediction(uid=353911, iid=13074, r_ui=0, est=4.886201494805394, details={'was_impossible': False}), Prediction(uid=353911, iid=193503, r_ui=0, est=4.879933560381735, details={'was_impossible': False}), Prediction(uid=353911, iid=188079, r_ui=0, est=4.871373950294299, details={'was_impossible': False}), Prediction(uid=353911, iid=31419, r_ui=0, est=4.858661785924539, details={'was_impossible': False}), Prediction(uid=353911, iid=73728, r_ui=0, est=4.8570946526885725, details={'was_imp

[192839, 193503, 73728, 189581, 31419, 188078, 188348, 27195, 13074, 188079]