In [68]:
# technical code 
# some middle steps are outputed, thus the code slightly differs from the code used in the website
# the syntax of giving input and getting output also differs

In [42]:
import numpy as np
import pandas as pd
import random

In [48]:
# reading data

recipes_df = pd.read_csv('allrecipes.csv', usecols = ["TITLES", "LINKS"])
recipes_df["recipeId"] = recipes_df.index+1
recipes_df = recipes_df[["recipeId", "TITLES", "LINKS"]]
recipes_df.head()

Unnamed: 0,recipeId,TITLES,LINKS
0,1,How to Make the Best Oatmeal,https://www.simplyrecipes.com/recipes/how_to_m...
1,2,Spinach Frittata,https://www.simplyrecipes.com/recipes/spinach_...
2,3,Orange-Spiced Whole Wheat Muffins,https://www.simplyrecipes.com/recipes/orange_s...
3,4,Tuscan Scrambled Eggs,https://www.simplyrecipes.com/recipes/tuscan_s...
4,5,Crispy Hash Browns,https://www.simplyrecipes.com/recipes/crispy_h...


In [49]:
# reading data

ratings_df = pd.read_csv('ratings.csv')
ratings_df.head()

Unnamed: 0,userId,recipeId,rating
0,1,527,3
1,1,238,3
2,1,400,2
3,1,88,3
4,1,648,5


In [50]:
# getting the input from the user

userInput = [
            {'TITLES':'Zucchini Ricotta Frittata', 'rating':5},
            {'TITLES':'Vegan Chocolate Pudding', 'rating':5},
            {'TITLES':'Mediterranean Mezze Platte', 'rating':2},
            {'TITLES':"Pecan Pralines", 'rating':0},
            {'TITLES':'Sous Vide Sesame Chicken', 'rating':4}
         ] 

inputRecipes = pd.DataFrame(userInput)
inputRecipes

Unnamed: 0,TITLES,rating
0,Zucchini Ricotta Frittata,5
1,Vegan Chocolate Pudding,5
2,Mediterranean Mezze Platte,2
3,Pecan Pralines,0
4,Sous Vide Sesame Chicken,4


In [51]:
# filtering the recipes by title

inputId = recipes_df[recipes_df['TITLES'].isin(inputRecipes['TITLES'].tolist())]
inputId

Unnamed: 0,recipeId,TITLES,LINKS
13,14,Zucchini Ricotta Frittata,https://www.simplyrecipes.com/recipes/zucchini...
248,249,Vegan Chocolate Pudding,https://www.simplyrecipes.com/recipes/vegan_ch...
506,507,Pecan Pralines,https://www.simplyrecipes.com/recipes/pralines/
776,777,Sous Vide Sesame Chicken,https://www.simplyrecipes.com/recipes/sous_vid...


In [52]:
# merging so we can get the recipeId

inputRecipes = pd.merge(inputId, inputRecipes)
inputRecipes

Unnamed: 0,recipeId,TITLES,LINKS,rating
0,14,Zucchini Ricotta Frittata,https://www.simplyrecipes.com/recipes/zucchini...,5
1,249,Vegan Chocolate Pudding,https://www.simplyrecipes.com/recipes/vegan_ch...,5
2,507,Pecan Pralines,https://www.simplyrecipes.com/recipes/pralines/,0
3,777,Sous Vide Sesame Chicken,https://www.simplyrecipes.com/recipes/sous_vid...,4


In [53]:
# filtering out users that gave review on for the same recipes 

userSubset = ratings_df[ratings_df['recipeId'].isin(inputRecipes['recipeId'].tolist())]
userSubset.head()

Unnamed: 0,userId,recipeId,rating
28,1,14,2
124,1,249,4
172,1,777,1
218,1,507,2
481,2,507,2


In [27]:
userSubsetGroup = userSubset.groupby(['userId'])

In [54]:
# giving priority to the users with more recipes in common

userSubsetGroup = sorted(userSubsetGroup,  key=lambda x: len(x[1]), reverse=True)

In [55]:
userSubsetGroup = userSubsetGroup[0:100]

In [56]:
# calculating Pearson Correlation

# Pearson Correlation formula
# https://www.socscistatistics.com/tests/pearson/#:~:text=The%20Pearson%20correlation%20coefficient%20is,means%20a%20perfect%20negataive%20correlation.

# the key is the userId and the value is the coefficient
pearsonCorrelationDict = {}

for name, group in userSubsetGroup:
    
    # sorting the input and current user group
    group = group.sort_values(by='recipeId')
    inputRecipes = inputRecipes.sort_values(by='recipeId')
    
    # total similar recipes
    nRatings = len(group)
    
    # review scores for the recipes
    temp_df = inputRecipes[inputRecipes['recipeId'].isin(group['recipeId'].tolist())]
    
    tempRatingList = temp_df['rating'].tolist()
    tempGroupList = group['rating'].tolist()
    
    # the calculation
    Sxx = sum([i**2 for i in tempRatingList]) - pow(sum(tempRatingList),2)/float(nRatings)
    Syy = sum([i**2 for i in tempGroupList]) - pow(sum(tempGroupList),2)/float(nRatings)
    Sxy = sum( i*j for i, j in zip(tempRatingList, tempGroupList)) - sum(tempRatingList)*sum(tempGroupList)/float(nRatings)
    
    # if the denominator is not zero, divide, otherwise the correlation is 0
    if Sxx != 0 and Syy != 0:
        pearsonCorrelationDict[name] = Sxy/np.sqrt(Sxx*Syy)
    else:
        pearsonCorrelationDict[name] = 0

In [57]:
pearsonCorrelationDict

{21: -0.8346223261119858,
 109: 0.8677218312746247,
 236: -0.4338609156373123,
 299: -0.36563621206356534,
 323: 0.8574929257125441,
 328: -0.5547950410914818,
 377: 0.2666739829256819,
 28: 0.6933752452815364,
 40: 0.3273268353539886,
 43: 0.6546536707079774,
 53: 0.3273268353539887,
 73: 0.32732683535398854,
 94: 0.944911182523068,
 143: -0.826033187630902,
 155: -0.8170571691028833,
 162: -0.998625428903524,
 166: 0.944911182523068,
 169: -0.3711537444790452,
 172: -0.6546536707079772,
 229: -0.6185895741317419,
 235: -0.3273268353539887,
 238: -0.9449111825230633,
 239: -0.7559289460184544,
 241: -0.6933752452815368,
 258: 0.5000000000000002,
 263: -0.654653670707977,
 287: -0.7559289460184546,
 292: 0.6185895741317419,
 313: 0.9449111825230679,
 345: -0.5,
 352: 0.944911182523068,
 365: 0.3273268353539886,
 370: 0.49999999999999994,
 380: 0.6546536707079772,
 382: -0.8170571691028833,
 386: -0.998625428903524,
 390: -0.6546536707079772,
 393: -0.944911182523068,
 408: -0.499999999

In [58]:
pearsonDF = pd.DataFrame.from_dict(pearsonCorrelationDict, orient='index')
pearsonDF.head()

Unnamed: 0,0
21,-0.834622
109,0.867722
236,-0.433861
299,-0.365636
323,0.857493


In [59]:
pearsonDF.columns = ['similarityIndex']
pearsonDF['userId'] = pearsonDF.index
pearsonDF.index = range(len(pearsonDF))
pearsonDF.head()

Unnamed: 0,similarityIndex,userId
0,-0.834622,21
1,0.867722,109
2,-0.433861,236
3,-0.365636,299
4,0.857493,323


In [60]:
topUsers=pearsonDF.sort_values(by='similarityIndex', ascending=False)[0:50]
topUsers.head()

Unnamed: 0,similarityIndex,userId
50,1.0,10
64,1.0,66
85,1.0,126
84,1.0,123
76,1.0,102


In [61]:
topUsersRating = topUsers.merge(ratings_df, left_on='userId', right_on='userId', how='inner')
topUsersRating.head()

Unnamed: 0,similarityIndex,userId,recipeId,rating
0,1.0,10,284,2
1,1.0,10,598,5
2,1.0,10,61,3
3,1.0,10,419,3
4,1.0,10,816,1


In [62]:
# Multiplying the similarity by the rating

topUsersRating['weightedRating'] = topUsersRating['similarityIndex']*topUsersRating['rating']
topUsersRating.head()

Unnamed: 0,similarityIndex,userId,recipeId,rating,weightedRating
0,1.0,10,284,2,2.0
1,1.0,10,598,5,5.0
2,1.0,10,61,3,3.0
3,1.0,10,419,3,3.0
4,1.0,10,816,1,1.0


In [63]:
tempTopUsersRating = topUsersRating.groupby('recipeId').sum()[['similarityIndex','weightedRating']]
tempTopUsersRating.columns = ['sum_similarityIndex','sum_weightedRating']
tempTopUsersRating.head()

Unnamed: 0_level_0,sum_similarityIndex,sum_weightedRating
recipeId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,11.295561,44.840026
2,8.766365,26.465896
3,6.926892,21.743606
4,15.754663,36.705773
5,12.440044,35.981033


In [64]:
recommendation_df = pd.DataFrame()

In [65]:
# taking the weighted average

recommendation_df['weighted average recommendation score'] = tempTopUsersRating['sum_weightedRating']/tempTopUsersRating['sum_similarityIndex']
recommendation_df['recipeId'] = tempTopUsersRating.index
recommendation_df.head()

Unnamed: 0_level_0,weighted average recommendation score,recipeId
recipeId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,3.969703,1
2,3.019027,2
3,3.139013,3
4,2.329836,4
5,2.892356,5


In [66]:
# reccomended recipes

recommendation_df = recommendation_df.sort_values(by='weighted average recommendation score', ascending=False)
recommendation_df.head()

Unnamed: 0_level_0,weighted average recommendation score,recipeId
recipeId,Unnamed: 1_level_1,Unnamed: 2_level_1
777,4.366387,777
418,4.143185,418
824,4.099754,824
380,4.062545,380
86,4.058152,86


In [67]:
recipes_df.loc[recipes_df['recipeId'].isin(recommendation_df.head(20)['recipeId'].tolist())]

Unnamed: 0,recipeId,TITLES,LINKS
0,1,How to Make the Best Oatmeal,https://www.simplyrecipes.com/recipes/how_to_m...
85,86,Thousand Island Dressing,https://www.simplyrecipes.com/recipes/thousand...
204,205,Carrot Top Pesto,https://www.simplyrecipes.com/recipes/carrot_t...
268,269,Strawberry Rhubarb Terrine,https://www.simplyrecipes.com/recipes/strawber...
313,314,Aviation Cocktail,https://www.simplyrecipes.com/recipes/aviation...
362,363,Fresh Pea Hummus,https://www.simplyrecipes.com/recipes/spring_p...
379,380,Cranberry Salsa,https://www.simplyrecipes.com/recipes/cranberr...
397,398,Classic Cheese Ball,https://www.simplyrecipes.com/recipes/classic_...
408,409,"Corn, Crab, and Old Bay Deviled Eggs",https://www.simplyrecipes.com/recipes/corn_cra...
417,418,Potato Latkes,https://www.simplyrecipes.com/recipes/potato_l...
