In [2]:
pip install scikit-surprise

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[K     |████████████████████████████████| 771 kB 4.1 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp38-cp38-linux_x86_64.whl size=2626455 sha256=36726accbe8974d33ab5ab6ed7650f31e8b018be6fa78a881ccf3a01644e0fce
  Stored in directory: /root/.cache/pip/wheels/af/db/86/2c18183a80ba05da35bf0fb7417aac5cddbd93bcb1b92fd3ea
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [3]:
import pandas as pd
import seaborn as sns
import numpy as np
import surprise
import re
import os
from surprise import Reader, SVD, accuracy, Dataset
from surprise.model_selection import cross_validate, train_test_split
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.feature_extraction import DictVectorizer

In [4]:
df_review = pd.read_csv("/content/RAW_interactions.csv")
df_recipes = pd.read_csv("/content/RAW_recipes.csv")

In [5]:
df_review.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."


In [6]:
df_recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


In [7]:
rating = df_review[['user_id','recipe_id','rating']]
reader = surprise.Reader(rating_scale=(1,5))
data = surprise.Dataset.load_from_df(rating,reader)

In [10]:
ratings_per_book = df_review['recipe_id'].value_counts()
ratings_per_book[ratings_per_book > 10].head(20)

2886      1613
27208     1601
89204     1579
39087     1448
67256     1322
54257     1305
22782     1234
32204     1220
69173      997
68955      904
33919      877
82102      855
25885      847
28148      802
135350     786
26110      770
99476      762
10744      731
129926     730
33671      727
Name: recipe_id, dtype: int64

In [11]:
df_recipes_10 = df_recipes[df_recipes['id'].isin(ratings_per_book[ratings_per_book>10].index)]

In [12]:
#using ingredients as the only content for approach 1 of content based filtering
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df_recipes_10['ingredients'])
tfidf_matrix.shape

(18765, 2214)

In [13]:
#cosine similarities
cosine_similarity = linear_kernel(tfidf_matrix,tfidf_matrix)
cosine_similarity[12000]

array([0.        , 0.        , 0.        , ..., 0.        , 0.02106207,
       0.        ])

In [14]:
#constructing a reverse map of indices
df_recipes_10 = df_recipes_10.reset_index(drop=True)
ind = pd.Series(df_recipes_10['ingredients'].index)

In [15]:
def get_recommendations(index, method, df_recipes_10):
  ing = ind[index]
  print("Given recipe: ", df_recipes_10['name'].iloc[ing])
  similarity_scores = list(enumerate(method[ing]))
  similarity_scores = sorted(similarity_scores, key = lambda x: x[1], reverse = True)
  similarity_scores = similarity_scores[1:6]
  recipes_index = [i[0] for i in similarity_scores]
  return df_recipes_10['ingredients'].iloc[recipes_index]

In [16]:
#Using the feature "ingredients" as the content
get_recommendations(56, cosine_similarity, df_recipes_10)

Given recipe:  starbucks  oat fudge bars


4162    ['butter', 'brown sugar', 'eggs', 'vanilla', '...
5787    ['flour', 'sugar', 'salt', 'baking powder', 'b...
7417    ['quick oats', 'flour', 'brown sugar', 'salt',...
1448    ['butter', 'sugar', 'brown sugar', 'banana', '...
6307    ['margarine', 'brown sugar', 'white sugar', 'v...
Name: ingredients, dtype: object

In [17]:
df_recipes_10 = df_recipes_10.replace(to_replace='None',value=np.nan).dropna()

In [18]:
#using description of the recipe as the only content for the approach 1 of content based filtering
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df_recipes_10['description'])
tfidf_matrix.shape

(18359, 18720)

In [19]:
#cosine similarities
cosine_similarity = linear_kernel(tfidf_matrix,tfidf_matrix)
cosine_similarity[12000]

array([0.03183325, 0.        , 0.0432712 , ..., 0.03752052, 0.02269259,
       0.09960815])

In [20]:
#constructing a reverse map of indices
df_recipes_10 = df_recipes_10.reset_index(drop=True)
ind = pd.Series(df_recipes_10['description'].index)

In [21]:
def get_recommendations(index, method, df_recipes_10):
  ing = ind[index]
  print("Given recipe description: ", df_recipes_10['description'].iloc[ing])
  similarity_scores = list(enumerate(method[ing]))
  similarity_scores = sorted(similarity_scores, key = lambda x: x[1], reverse = True)
  similarity_scores = similarity_scores[1:6]
  recipes_index = [i[0] for i in similarity_scores]
  return df_recipes_10['description'].iloc[recipes_index]

In [22]:
#Using the feature "description" as the content
get_recommendations(56, cosine_similarity, df_recipes_10)

Given recipe description:  i am always tempted to buy these at starbucks so when a friend brought these to a cookie exchange i was quite excited!


11936    i received these as part of a christmas bake e...
5852     i received this as part of a christmas bake ex...
15826    this is starbucks recipe for their banana waln...
17326    after receiving these as part of a christmas b...
7124     a friend of mine brought this for dinner and i...
Name: description, dtype: object