# 0. Imports

In [1]:
import pandas as pd
import numpy as np

---

# 1. Load datasets

In [13]:
df_reviews = pd.read_pickle('datasets/raw/reviews.pkl')

In [14]:
df_recipes = pd.read_csv("datasets/sample/recipes_sample_main.csv", sep=';')
len(df_recipes)

24742

In [3]:
df_recipes = pd.read_pickle('datasets/refined/recipes_refined_merged_char.pkl')
len(df_recipes)

494963

### Make sample df

df_sample = df_recipes.sample(frac = .1)
len(df_sample)

### Save sample (CSV+PICKLE) 

## 1.1 Load from pickle

### Override if necessary

---

# 2. Refine 

### Review simplify

In [15]:
df_reviews.drop(["ReviewId", "AuthorName", "Review", "DateSubmitted", "DateModified"], axis=1, inplace = True)
df_reviews.columns = ["Item", "User", "Rating"]
df_reviews = df_reviews[["User", "Item", "Rating"]]

### Filtering reviews

In [16]:
df_recipe_review_counts = df_reviews.groupby(['Item']).size()

df_filtered_recipes = df_recipe_review_counts.loc[(df_recipe_review_counts > 10)]

filtered_recipes_list = df_filtered_recipes.index.tolist()

len(filtered_recipes_list)

23429

### Is the reviewed recipe in the main dataset (narrowing)

In [17]:
all_recipes_list = df_recipes.id.tolist()

filtered_all_recipes_list = [value for value in filtered_recipes_list if value in all_recipes_list]

len(filtered_all_recipes_list)

1175

In [18]:
df_recipe_ratings = df_reviews[df_reviews['Item'].isin(filtered_all_recipes_list)]
df_recipe_ratings.count()

User      38058
Item      38058
Rating    38058
dtype: int64

In [19]:
df_recipe_ratings

Unnamed: 0,User,Item,Rating
47,3166,4165,0
118,3912,8278,5
180,4240,4165,0
250,5049,1559,4
276,5643,1209,5
...,...,...,...
1401865,2002898723,11345,1
1401910,2002900056,406867,5
1401937,2002900712,11345,5
1401949,2002901010,2886,0


### Save recipe ratings

In [9]:
df_recipe_ratings.to_pickle('serialized-model/recipe_ratings.pkl')

---

# SAMPLE

# Add new user

---

# 3. Model training

### Imports

In [20]:
from surprise import NormalPredictor
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise import SVDpp
from surprise import KNNBasic
from surprise.model_selection import cross_validate

import pickle

### Load and read df

In [21]:
reader = Reader(rating_scale=(0, 5))

#data = Dataset.load_from_df(df_concat_v3, reader)
data = Dataset.load_from_df(df_recipe_ratings, reader)
#data = Dataset.load_from_df(df_concat, reader)

### Training set

In [22]:
trainSet = data.build_full_trainset()

In [12]:
trainSet.to_raw_uid(0)

2008

In [13]:
def trainset_contains(id):
    try:
        trainSet.to_inner_uid(id)
        return True
    except:
        print("Item is not part of the trainset.")
        return False

In [15]:
trainset_contains(90221)

True

### Save and load training set

In [68]:
pickle.dump(trainSet, open('serialized-model/trainset.pkl', 'wb'))

### Anti training (test) set for ONE specific user

In [14]:
def make_anti_testset(raw_user_id):
    anti_testset_user = []
    
    #targetUser = 0 #inner_id of the target user
    targetUser = trainSet.to_inner_uid(raw_user_id)
    
    fillValue = trainSet.global_mean
    
    user_item_ratings = trainSet.ur[targetUser]
    user_items = [item for (item,_) in (user_item_ratings)]
    user_items
    
    ratings = trainSet.all_ratings()
    
    for iid in trainSet.all_items():
        if(iid not in user_items):
            anti_testset_user.append((trainSet.to_raw_uid(targetUser),trainSet.to_raw_iid(iid),fillValue))
            
    return anti_testset_user

## 3.1 Validating

### Normal Predictor

In [14]:
algo = NormalPredictor()
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NormalPredictor on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.6192  1.6217  1.6171  1.6239  1.6216  1.6207  0.0023  
MAE (testset)     1.0898  1.0930  1.0896  1.0950  1.0909  1.0917  0.0020  
Fit time          1.26    1.35    1.35    1.36    1.42    1.35    0.05    
Test time         1.84    2.09    1.75    1.32    1.79    1.76    0.25    


{'test_rmse': array([1.61920459, 1.62172679, 1.61711712, 1.6238843 , 1.62160761]),
 'test_mae': array([1.08980806, 1.09295632, 1.08963012, 1.09497301, 1.09093494]),
 'fit_time': (1.2554898262023926,
  1.3547992706298828,
  1.35099196434021,
  1.3627970218658447,
  1.418653964996338),
 'test_time': (1.835482120513916,
  2.091679811477661,
  1.747131109237671,
  1.317690372467041,
  1.789693832397461)}

### KNNBasic

In [None]:
sim_options = {'name': 'cosine',
               'user_based': False  # compute  similarities between items
               }
algo = KNNBasic(sim_options=sim_options)
# Run 5-fold cross-validation and print results
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

### SVD

In [23]:
#n_factors=100, n_epochs=25, lr_all=0.005, reg_all=0.1
algo = SVD(n_factors=100, n_epochs=25, lr_all=0.005, reg_all=0.1)
# Run 5-fold cross-validation and print results
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.3140  1.3119  1.3324  1.3194  0.0092  
MAE (testset)     0.8499  0.8536  0.8620  0.8552  0.0051  
Fit time          2.85    2.57    2.58    2.67    0.13    
Test time         0.18    0.11    0.10    0.13    0.04    


{'test_rmse': array([1.31404924, 1.31189568, 1.33236792]),
 'test_mae': array([0.84987175, 0.85362411, 0.86201313]),
 'fit_time': (2.848823308944702, 2.5730812549591064, 2.5816802978515625),
 'test_time': (0.18066859245300293, 0.1072394847869873, 0.10192370414733887)}

### GridSearchCV

In [12]:
from surprise.model_selection import GridSearchCV

In [None]:
from surprise.model_selection import GridSearchCV

param_grid = {'n_factors': [100,150],
              'n_epochs': [20,25,30],
              'lr_all':[0.005,0.01,0.1],
              'reg_all':[0.02,0.05,0.1]}
grid_search = GridSearchCV(SVD, param_grid, measures=['rmse','mae'], cv=3)
grid_search.fit(data)

In [None]:
print(grid_search.best_score['rmse'])
print(grid_search.best_score['mae'])

In [None]:
print(grid_search.best_params['rmse'])

In [None]:
algo = grid_search.best_estimator['rmse']

cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

## 3.2 Fit and test on trainset

### Fitting algorithm

In [71]:
algo = KNNBasic(sim_options = {"name":"cosine", "user_based":False})#SVD()
algo.fit(trainSet)

Computing the cosine similarity matrix...
Done computing similarity matrix.


  sim = construction_func[name](*args)


<surprise.prediction_algorithms.knns.KNNBasic at 0x7fb6f6233a90>

In [72]:
pickle.dump(algo, open('serialized-model/recrecsys.pkl', 'wb'))

### Checking for one user

In [73]:
user_id = 8

raw_user_id = trainSet.to_raw_uid(user_id)

raw_user_id
#2178

2312

### List user reviewed recipes

In [74]:
df_user_recipes = df_recipe_ratings.loc[df_recipe_ratings["User"] == raw_user_id]
#df_user_recipes = df_concat.loc[df_concat["User"] == raw_user_id]

df_recipes.loc[df_recipes["id"].isin(df_user_recipes.Item.tolist())]

Unnamed: 0,id,name,author_name,submitted,image_url,minutes,description,category,tags,search_terms,steps,ingredients,ingredients_raw_str,serving_size,servings,calories,rating,review_count
1870,61718,Cross Rib Roast,agileangus,2003-05-08,"""https://img.sndimg.com/food/image/upload/w_55...",85,"I found this delicious, simple-to-prepare reci...",Roast Beef,"{""weeknight"",""time-to-make"",""main-ingredient"",...","{""roast"",""dinner""}","{""Brush roast with balsamic vinegar."",""Make a ...","{""cross-rib roasts"",""balsamic vinegar"",""garlic...","{""2 lbs cross-rib roasts"",""2 tablespoon...",1 (14 g),4,47.4,5.0,90
19197,2886,Best Banana Bread,lkadlec,1999-09-26,"""https://img.sndimg.com/food/image/upload/w_55...",70,You'll never need another banana bread recipe ...,Quick Breads,"{""time-to-make"",""course"",""main-ingredient"",""cu...","{""bread""}","{""Remove odd pots and pans from oven."",""Prehea...","{""butter"",""granulated sugar"",""eggs"",""bananas"",...","{""1/2 cup butter; softened "",""1 cup g...",1 (96 g),10,272.8,5.0,2273
22838,26479,Chocolate Chip Oatmeal Cookies,Kim D.,2002-04-27,"""https://img.sndimg.com/food/image/upload/w_55...",50,These cookies are so good! I found the recipe ...,Drop Cookies,"{""60-minutes-or-less"",""time-to-make"",""course"",...","{""cookie"",""dessert""}","{""Preheat oven to 375°F."",""Sift together flour...","{""flour"",""baking soda"",""salt"",""shortening"",""br...","{""1 1/2 cups flour"",""1 1/4 teaspoons b...",1 (1496 g),1,723.7,5.0,171


In [75]:
df_recipe_ratings.loc[(df_recipe_ratings["Item"].isin(df_user_recipes.Item.tolist())) & (df_recipe_ratings["User"] == raw_user_id)]

Unnamed: 0,User,Item,Rating
485,2312,2886,5
23642,2312,26479,5
56612,2312,61718,4


### Make anti testset

In [76]:
anti_testset = make_anti_testset(raw_user_id)

###  Make prediction with locally made algorithm

In [77]:
predictions = algo.test(anti_testset)

### OR with pickled

In [78]:
pred = pd.DataFrame(predictions)
pred = pred.loc[pred["est"] == 5]
pred = pd.concat([pred, pred['details'].apply(pd.Series)], axis = 1).drop('details', axis = 1)
pred.sort_values(by=['actual_k'], inplace=True, ascending=False)

In [79]:
pred

Unnamed: 0,uid,iid,r_ui,est,actual_k,was_impossible
1,2312,8278,4.383993,5.0,2,False
607,2312,116266,4.383993,5.0,2,False
495,2312,96582,4.383993,5.0,2,False
493,2312,87629,4.383993,5.0,2,False
490,2312,94355,4.383993,5.0,2,False
...,...,...,...,...,...,...
559,2312,107864,4.383993,5.0,1,False
557,2312,89493,4.383993,5.0,1,False
551,2312,55452,4.383993,5.0,1,False
550,2312,73886,4.383993,5.0,1,False


### List Top-n results

In [80]:
def list_result(predictions):
    pred = pd.DataFrame(predictions)
    pred.sort_values(by=['est'], inplace=True, ascending=False)
    
    recipe_list = pred.head(10)['iid'].to_list()
    
    print(recipe_list)
    
    # return df_recipes.loc[recipe_list]
    return df_recipes.loc[df_recipes["id"].isin(recipe_list)]

In [81]:
def list_result_knn(predictions):
    pred = pd.DataFrame(predictions)
    pred = pred.loc[pred["est"] == 5]
    pred = pd.concat([pred, pred['details'].apply(pd.Series)], axis = 1).drop('details', axis = 1)
    pred.sort_values(by=['actual_k'], inplace=True, ascending=False)
    recipe_list = pred.head(10)['iid'].to_list()
    
    for x, y in zip(recipe_list, pred.head(10)['actual_k']):
        print(x, y)
    # return df_recipes.loc[recipe_list]
    return df_recipes.loc[df_recipes["id"].isin(recipe_list)]

## KNN

In [82]:
df_results = list_result_knn(predictions)
df_results

8278 2
116266 2
96582 2
87629 2
94355 2
88453 2
33420 2
78922 2
84592 2
87689 2


Unnamed: 0,id,name,author_name,submitted,image_url,minutes,description,category,tags,search_terms,steps,ingredients,ingredients_raw_str,serving_size,servings,calories,rating,review_count
2066,96582,Diana's Awesome Oatmeal Muffins,Roosie,2004-07-29,"""https://img.sndimg.com/food/image/upload/w_55...",45,Full of the good stuff- and you get to choose ...,Quick Breads,"{""60-minutes-or-less"",""time-to-make"",""course"",...","{""vegetarian"",""healthy"",""low-fat"",""bread"",""veg...","{""Preheat oven to 400°F."",""Soak oatmeal in liq...","{""rolled oats"",""coffee"",""applesauce"",""salt"",""b...","{""2 cups rolled oats"",""2 cups juice ...",1 (64 g),18,96.0,4.0,46
2814,8278,Mexican Wedding Cookies,truebrit,2000-03-13,"""https://img.sndimg.com/food/image/upload/w_55...",25,Traditional cookie.,Dessert,"{""30-minutes-or-less"",""time-to-make"",""course"",...","{""cookie"",""dessert"",""mexican""}","{""Combine all ingredients."",""Form into 1 1/2"" ...","{""butter"",""powdered sugar"",""flour"",""nuts"",""van...","{""1 cup butter; softened "",""1 cup po...",1 (20 g),36,106.4,5.0,42
3050,78922,Kittencal's Best Deep Dark Chocolate Layer Cake,Kittencalrecipezazz,2003-12-14,"""https://img.sndimg.com/food/image/upload/w_55...",40,This will probably be the only layer cake you ...,Dessert,"{""60-minutes-or-less"",""time-to-make"",""course"",...","{""cake"",""dessert""}","{""Set oven to 350 degrees F."",""Line bottoms of...","{""butter"",""brown sugar"",""eggs"",""buttermilk"",""v...","{""1 cup butter; softened "",""1 1/2 cups ...",1 (191 g),12,706.3,5.0,27
3367,87689,Cake Flour Substitute,Lennie,2004-03-29,"""https://img.sndimg.com/food/image/upload/w_55...",2,"I don't always have cake flour on hand, and I'...",Vegan,"{""15-minutes-or-less"",""time-to-make"",""course"",...","{""low-sodium"",""vegetarian"",""healthy"",""low-fat""...","{""To make two cups of cake-and-pastry flour (c...","{""all-purpose flour"",""cornstarch""}","{""1 3/4 cups all-purpose flour"",""1/4 cup ...",1 (251 g),1,459.1,5.0,90
7967,33420,Great Buffalo Chicken Pizza,Mini Ravindran,2002-07-07,"""https://img.sndimg.com/food/image/upload/w_55...",25,Great as the recipe says !,One Dish Meal,"{""30-minutes-or-less"",""time-to-make"",""course"",...","{""pizza"",""dinner"",""chicken""}","{""Preheat the oven to 400."",""Dice the cooked c...","{""pizza dough"",""chicken tenders"",""butter"",""hot...","{""1 pizza dough (12 inch)"",""1/2 lb c...",1 (161 g),4,432.8,5.0,86
9449,94355,Cheesy Spaghetti Squash,PaulaG,2004-06-26,"""https://img.sndimg.com/food/image/upload/w_55...",25,Not only did this recipe give me an additional...,Cheese,"{""30-minutes-or-less"",""time-to-make"",""course"",...","{""side"",""diabetic""}","{""Pierce the spaghetti squash several times wi...","{""spaghetti squash"",""parmesan cheese"",""reduced...","{""1 spaghetti squash; approximately 3 lb...",1 (58 g),6,24.7,4.5,15
10922,116266,Buffalo Chicken Wing Dip,Parsley,2005-04-11,"""https://img.sndimg.com/food/image/upload/w_55...",1,I first tried this buffalo chicken wing dip at...,Chicken,"{""60-minutes-or-less"",""time-to-make"",""course"",...","{""appetizer"",""low-carb"",""chicken""}","{""Preaheat oven top 350."",""In a large bowl; mi...","{""chicken breasts"",""hot sauce"",""cream cheese"",...","{""1 1/2 lbs chicken breasts; cooked and sh...",1 (73 g),32,196.8,5.0,75
11464,87629,Sausage Breakfast Muffins (OAMC),Lightly Toasted,2004-03-28,"""https://img.sndimg.com/food/image/upload/w_55...",45,This is a recipe I found online and adapted to...,Quick Breads,"{""60-minutes-or-less"",""time-to-make"",""course"",...","{""breakfast"",""bread""}","{""Line muffin tins with paper liners; and set ...","{""chorizo sausage"",""bisquick"",""cornmeal"",""eggs...","{""2 lbs chorizo sausage or 2 lbs bulk...",1 (65 g),48,193.9,4.5,63
14211,88453,Best Ever BBQ Chicken,DDW7976,2004-04-06,"""https://img.sndimg.com/food/image/upload/w_55...",1,The BBQ sauce on this juicy grilled chicken is...,Chicken Breast,"{""60-minutes-or-less"",""time-to-make"",""course"",...","{""dinner"",""barbecue"",""chicken""}","{""In a saucepan; saute the garlic in butter un...","{""garlic cloves"",""butter"",""heinz ketchup"",""bro...","{""3 garlic cloves; minced "",""2 teaspoo...",1 (127 g),8,205.1,5.0,102
24590,84592,The Best Blueberry Banana Muffins,Bliss,2004-02-21,"""https://img.sndimg.com/food/image/upload/w_55...",40,I got these off the internet. These were one o...,Quick Breads,"{""60-minutes-or-less"",""time-to-make"",""course"",...","{""breakfast"",""bread""}","{""Heat oven to 350°."",""In large mix bowl; comb...","{""sugar"",""butter"",""buttermilk"",""eggs"",""vanilla...","{""1 1/4 cups sugar"",""2/3 cup butter"",""...",1 (220 g),6,590.4,5.0,24


### Examination

---

# 4. Save model