# 0. Imports

In [1]:
import pandas as pd
import numpy as np

---

# 1. Load datasets

In [44]:
df_reviews = pd.read_pickle('datasets/raw/reviews.pkl')

In [3]:
df_recipes = pd.read_csv("datasets/sample/recipes_sample_main.csv", sep=';')
len(df_recipes)

24742

### Make sample df

df_sample = df_recipes.sample(frac = .1)
len(df_sample)

### Save sample (CSV+PICKLE) 

In [24]:
df_sample.to_csv('sample/refined_recipes_2_sample_main.csv', index=False, sep=";")
df_sample.to_pickle('sample/refined_recipes_2_sample_main.pkl')

## 1.1 Load from pickle

In [41]:
df_sample = pd.read_pickle('sample/refined_recipes_2_sample_main.pkl')

### Override if necessary

In [42]:
df_recipes = df_sample

---

# 2. Refine 

### Review simplify

In [4]:
df_reviews.drop(["ReviewId", "AuthorName", "Review", "DateSubmitted", "DateModified"], axis=1, inplace = True)
df_reviews.columns = ["Item", "User", "Rating"]
df_reviews = df_reviews[["User", "Item", "Rating"]]

### Filtering reviews

In [5]:
df_recipe_review_counts = df_reviews.groupby(['Item']).size()

df_filtered_recipes = df_recipe_review_counts.loc[(df_recipe_review_counts > 4)]

filtered_recipes_list = df_filtered_recipes.index.tolist()

len(filtered_recipes_list)

64828

### Is the reviewed recipe in the main dataset (narrowing)

In [6]:
all_recipes_list = df_recipes.id.tolist()

filtered_all_recipes_list = [value for value in filtered_recipes_list if value in all_recipes_list]

len(filtered_all_recipes_list)

3174

In [7]:
df_recipe_ratings = df_reviews[df_reviews['Item'].isin(filtered_all_recipes_list)]
df_recipe_ratings.count()

User      51511
Item      51511
Rating    51511
dtype: int64

### Save recipe ratings

In [8]:
df_recipe_ratings.to_pickle('serialized-model/recipe_ratings.pkl')

In [9]:
df_recipe_ratings = pd.read_pickle('serialized-model/recipe_ratings.pkl')

### Adding new user reviews

In [33]:
new_user_liked_recipes = [9872, 809]

In [34]:
df_new_user = pd.DataFrame({"User": 1853000001, "Item": new_user_liked_recipes, "Rating": 5})
df_new_user

Unnamed: 0,User,Item,Rating
0,1853000001,9872,5
1,1853000001,809,5


In [35]:
df_recipes.loc[df_recipes["id"].isin(df_new_user.Item.tolist())]

Unnamed: 0,id,name,description,ingredients,ingredients_raw_str,serving_size,servings,steps,tags,search_terms,...,image_url,minutes,category,calories,n_steps,n_ingredients,nutrition,rating,review_count,listed


### DROP IF NECESSARY

In [11]:
idxs = df_recipe_ratings.loc[df_reviews["User"] == 4177].index
idxs

Int64Index([145, 887], dtype='int64')

In [12]:
df_recipe_ratings_clarified = df_recipe_ratings.drop(idxs)

### Concatenating base and new df

In [13]:
df_concat = pd.concat([df_recipe_ratings_clarified, df_new_user], sort=False, ignore_index=True)
df_concat2 = pd.concat([df_recipe_ratings, df_new_user], sort=False, ignore_index=True)

In [14]:
df_concat.loc[df_concat["User"].isin([1853000001, 4177])]

Unnamed: 0,User,Item,Rating
572501,1853000001,9872,5
572502,1853000001,809,5


---

# 3. Model training

### Imports

In [19]:
from surprise import NormalPredictor
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise import SVDpp
from surprise import KNNBasic
from surprise.model_selection import cross_validate

import pickle

### Load and read df

In [20]:
reader = Reader(rating_scale=(0, 5))

data = Dataset.load_from_df(df_recipe_ratings, reader)
#data = Dataset.load_from_df(df_concat, reader)

### Training set

In [21]:
trainSet = data.build_full_trainset()

In [22]:
trainSet

<surprise.trainset.Trainset at 0x7fc9a4266460>

In [23]:
trainSet.to_raw_uid(0)

3166

In [24]:
def trainset_contains(id):
    try:
        trainSet.to_inner_uid(id)
        return True
    except:
        print("Item is not part of the trainset.")
        return False

In [25]:
trainset_contains(2046)

Item is not part of the trainset.


False

### Save and load training set

In [26]:
pickle.dump(trainSet, open('serialized-model/trainset.pkl', 'wb'))

In [126]:
trainSet = pickle.load(open('serialized-model/trainset.pkl', 'rb'))

### Anti training (test) set for ONE specific user

In [27]:
def make_anti_testset(raw_user_id):
    anti_testset_user = []
    
    #targetUser = 0 #inner_id of the target user
    targetUser = trainSet.to_inner_uid(raw_user_id)
    
    fillValue = trainSet.global_mean
    
    user_item_ratings = trainSet.ur[targetUser]
    user_items = [item for (item,_) in (user_item_ratings)]
    user_items
    
    ratings = trainSet.all_ratings()
    
    for iid in trainSet.all_items():
        if(iid not in user_items):
            anti_testset_user.append((trainSet.to_raw_uid(targetUser),trainSet.to_raw_iid(iid),fillValue))
            
    return anti_testset_user

## 3.1 Validating

### Normal Predictor

In [28]:
#algo = NormalPredictor()
#cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

### KNNBasic

### SVD

### GridSearchCV

## 3.2 Fit and test on trainset

### Fitting algorithm

In [29]:
algo = SVD()
algo.fit(trainSet)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fc96a271370>

In [30]:
pickle.dump(algo, open('serialized-model/recrecsys.pkl', 'wb'))

### Checking for one user

In [31]:
user_id = 19

raw_user_id = trainSet.to_raw_uid(user_id)

raw_user_id
#2178

7890

In [32]:
#raw_user_id = 1853000001

### List user reviewed recipes

In [33]:
df_user_recipes = df_recipe_ratings.loc[df_recipe_ratings["User"] == raw_user_id]
#df_user_recipes = df_concat.loc[df_concat["User"] == raw_user_id]

df_recipes.loc[df_recipes["id"].isin(df_user_recipes.Item.tolist())]

Unnamed: 0,id,name,author_name,submitted,image_url,minutes,description,category,tags,search_terms,steps,ingredients,ingredients_raw_str,serving_size,servings,calories,rating,review_count
19197,2886,Best Banana Bread,lkadlec,1999-09-26,"""https://img.sndimg.com/food/image/upload/w_55...",70,You'll never need another banana bread recipe ...,Quick Breads,"{""time-to-make"",""course"",""main-ingredient"",""cu...","{""bread""}","{""Remove odd pots and pans from oven."",""Prehea...","{""butter"",""granulated sugar"",""eggs"",""bananas"",...","{""1/2 cup butter; softened "",""1 cup g...",1 (96 g),10,272.8,5.0,2273


In [34]:
df_recipe_ratings.loc[(df_recipe_ratings["Item"].isin(df_user_recipes.Item.tolist())) & (df_recipe_ratings["User"] == raw_user_id)]

Unnamed: 0,User,Item,Rating
668,7890,2886,5


### Make anti testset

In [35]:
anti_testset = make_anti_testset(raw_user_id)

###  Make prediction with locally made algorithm

In [36]:
predictions = algo.test(anti_testset)

### OR with pickled

### List Top-n results

In [37]:
def list_result(predictions):
    pred = pd.DataFrame(predictions)
    pred.sort_values(by=['est'], inplace=True, ascending=False)
    
    recipe_list = pred.head(10)['iid'].to_list()
    
    print(recipe_list)
    
    # return df_recipes.loc[recipe_list]
    return df_recipes.loc[df_recipes["id"].isin(recipe_list)]

In [38]:
df_results = list_result(predictions)
df_results

[4218, 97213, 118450, 1209, 7483, 47769, 118258, 182040, 46645, 38711]


Unnamed: 0,id,name,author_name,submitted,image_url,minutes,description,category,tags,search_terms,steps,ingredients,ingredients_raw_str,serving_size,servings,calories,rating,review_count
584,118258,Kittencal's Best Chicken Stock/Broth (Crock Po...,Kittencalrecipezazz,2005-04-20,"""https://img.sndimg.com/food/image/upload/w_55...",28,Plan ahead this method takes 2 days to create ...,Stocks,"{""time-to-make"",""course"",""main-ingredient"",""pr...","{""low-carb""}","{""Place the chicken pieces and/or carcass (or ...","{""chicken pieces"",""onions"",""carrots"",""celery r...","{""4 lbs uncooked chicken pieces (bone and ...",1 (118 g),12,222.1,5.0,68
3978,46645,Nutter Butter Frozen Peanut Butter Pie,Leta8076,2002-11-18,"""https://img.sndimg.com/food/image/upload/w_55...",270,Delicious frozen peanut butter pie. Cook time ...,Pie,"{""weeknight"",""time-to-make"",""course"",""preparat...","{""dessert"",""pie""}","{""To make crust*; crush the cookies in a zippe...","{""nutter butter sandwich cookies"",""butter"",""cr...","{""24 Nutter Butter sandwich cookies"",""5 ...",1 (213 g),6,958.1,5.0,24
11146,97213,Jeni's Chocolate Ganache,Amanda Beth,2004-08-07,"""https://img.sndimg.com/food/image/upload/w_55...",75,"I named this for my sister-in-law, Jeni, after...",Dessert,"{""time-to-make"",""course"",""main-ingredient"",""pr...","{""dessert"",""low-sodium""}","{""In a small saucepan; over low heat; combine ...","{""semisweet chocolate"",""heavy whipping cream"",...","{""6 ounces semisweet chocolate; chopped ...",1 (307 g),1,1384.1,5.0,35
12270,118450,Heavenly Light Dinner Rolls (abm),Marie,2005-04-22,"""https://img.sndimg.com/food/image/upload/w_55...",32,One of my favorite dinner rolls. Extremely li...,Yeast Breads,"{""60-minutes-or-less"",""time-to-make"",""course"",...","{""bread"",""dinner"",""light""}","{""Add all ingredients to bread machine accordi...","{""milk"",""eggs"",""butter"",""salt"",""sugar"",""bread ...","{""1 cup milk"",""2 eggs"",""1/2 cup ...",1 (1024 g),1,175.5,5.0,14
12934,1209,Spinach Artichoke Dip,Juli9251,1999-09-22,"""https://img.sndimg.com/food/image/upload/w_55...",30,Here is a delicious spinach artichoke dip!,Cheese,"{""30-minutes-or-less"",""time-to-make"",""course"",...","{""appetizer"",""dinner""}","{""Preheat oven to 375°F."",""Mix together Parmes...","{""parmesan cheese"",""frozen chopped spinach"",""a...","{""2 cups parmesan cheese"",""1 (10 ounce)...",1 (84 g),15,150.2,5.0,609
16842,182040,Spicy Meat and Cheese Stromboli,KelBel,2006-08-16,"""https://img.sndimg.com/food/image/upload/w_55...",20,This is from 30 Minute Meals on Food Network. ...,Lunch/Snacks,"{""30-minutes-or-less"",""time-to-make"",""course"",...","{""dinner"",""lunch""}","{""Pre-heat oven to 400 degrees."",""Break open t...","{""pizza dough"",""pepperoni"",""provolone cheese"",...","{""1 pizza dough; refrigerated (recommen...",1 (86 g),4,376.0,5.0,51
19410,38711,"Make Your Own Baking Powder - Clone, Substitute",Dancer,2002-08-28,,5,This is handy when you have forgotten to buy b...,Low Protein,"{""15-minutes-or-less"",""time-to-make"",""course"",...","{""diabetic"",""vegetarian"",""healthy"",""gluten-fre...","{""Mix together; store in refrigerator."",""Can b...","{""cream of tartar"",""baking soda"",""cornstarch""}","{""1/2 teaspoon cream of tartar"",""1/4 teas...",1 (4 g),1,6.4,5.0,23
19608,4218,Honey Oat Bread (Bread Machine),Tonkcats,1999-11-10,"""https://img.sndimg.com/food/image/upload/w_55...",190,A nice simple hearth-like bread...Great for an...,Yeast Breads,"{""time-to-make"",""course"",""preparation"",""occasi...","{""healthy"",""bread""}","{""Combine ingredients in breadmaker per your m...","{""water"",""honey"",""vegetable oil"",""white bread ...","{""1 cup water"",""1/4 cup honey"",""2 t...",1 (97 g),8,255.3,5.0,53
21427,7483,Cajun Pork Roast,Julesong,2000-03-13,,0,"Garlicky, spicy pork roast!",Pork,"{""weeknight"",""15-minutes-or-less"",""time-to-mak...","{""roast"",""cajun"",""pork"",""dinner"",""southern""}","{""MARINADE: Combine chopped onion; chopped ga...","{""boneless boston pork roast"",""steak sauce"",""o...","{""10 lbs boneless boston pork roast"",""2 ...",1 (383 g),12,1074.7,5.0,9
23265,47769,Cranberry Chutney,Jellyqueen,2002-12-07,"""https://img.sndimg.com/food/image/upload/w_55...",40,"Once again, I have to give Taste of Home credi...",Chutneys,"{""60-minutes-or-less"",""time-to-make"",""course"",...","{""healthy"",""low-fat"",""low-sodium""}","{""In a saucepan; combine the cranberries; suga...","{""fresh cranberries"",""sugar"",""water"",""salt"",""g...","{""1 lb fresh cranberries"",""2 1/2 cups ...",1 (1955 g),1,456.6,5.0,16


### Examination

---

# 4. Save model