In [2]:
pip install implicit

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting implicit
  Downloading implicit-0.6.2-cp39-cp39-manylinux2014_x86_64.whl (18.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.6/18.6 MB[0m [31m41.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: implicit
Successfully installed implicit-0.6.2


In [3]:
import numpy as np
import pandas as pd
from implicit.nearest_neighbours import bm25_weight
from implicit.als import AlternatingLeastSquares
from implicit.evaluation import train_test_split, ranking_metrics_at_k



In [4]:
metadata = pd.read_csv('/content/metadata.csv')
metadata = metadata.rename(columns={'id': 'recipe_id'})
metadata['nutrition'] = metadata['nutrition'].map(eval)
metadata[['calories', 'fat', 'sugar', 'sodium', 'protien', 'saturated_fat', 'carbs']] = pd.DataFrame(metadata['nutrition'].tolist(), index=metadata.index)
metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231637 entries, 0 to 231636
Data columns (total 19 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   name            231636 non-null  object 
 1   recipe_id       231637 non-null  object 
 2   minutes         231637 non-null  int64  
 3   contributor_id  231637 non-null  object 
 4   submitted       231637 non-null  object 
 5   tags            231637 non-null  object 
 6   nutrition       231637 non-null  object 
 7   n_steps         231637 non-null  int64  
 8   steps           231637 non-null  object 
 9   description     226658 non-null  object 
 10  ingredients     231637 non-null  object 
 11  n_ingredients   231637 non-null  int64  
 12  calories        231637 non-null  float64
 13  fat             231637 non-null  float64
 14  sugar           231637 non-null  float64
 15  sodium          231637 non-null  float64
 16  protien         231637 non-null  float64
 17  saturated_

In [5]:
train = pd.read_csv('/content/train.csv')
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 165226 entries, 0 to 165225
Data columns (total 5 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   user_id    165226 non-null  object
 1   recipe_id  165226 non-null  object
 2   date       165226 non-null  object
 3   rating     165226 non-null  int64 
 4   review     165226 non-null  object
dtypes: int64(1), object(4)
memory usage: 6.3+ MB


In [6]:
merged_df = metadata[['recipe_id', 'ingredients', 'calories', 'fat', 'sugar', 'sodium', 'protien', 'saturated_fat', 'carbs']].merge(train[['user_id', 'recipe_id', 'rating']], on = 'recipe_id')
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 165226 entries, 0 to 165225
Data columns (total 11 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   recipe_id      165226 non-null  object 
 1   ingredients    165226 non-null  object 
 2   calories       165226 non-null  float64
 3   fat            165226 non-null  float64
 4   sugar          165226 non-null  float64
 5   sodium         165226 non-null  float64
 6   protien        165226 non-null  float64
 7   saturated_fat  165226 non-null  float64
 8   carbs          165226 non-null  float64
 9   user_id        165226 non-null  object 
 10  rating         165226 non-null  int64  
dtypes: float64(7), int64(1), object(3)
memory usage: 15.1+ MB


In [7]:
df = merged_df[['user_id', 'ingredients']]
df['ingredients'] = df['ingredients'].map(eval)
df = df.explode('ingredients')
pivot_df = pd.pivot_table(df, index='user_id', columns='ingredients', values='ingredients', aggfunc=lambda x: len(x), fill_value=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ingredients'] = df['ingredients'].map(eval)


In [8]:
pivot_df.shape

(11346, 9958)

In [9]:
recipe_df = merged_df.groupby('recipe_id').agg(
mean_rating = pd.NamedAgg(column='rating', aggfunc='mean'),
num_rating = pd.NamedAgg(column='rating', aggfunc='count')
)

recipe_df = recipe_df.merge(merged_df.loc[~merged_df['recipe_id'].duplicated(), ['recipe_id', 'ingredients', 'calories', 'fat', 'sugar', 'sodium', 'protien', 'saturated_fat', 'carbs']], left_index=True, right_on='recipe_id')
recipe_df['ingredients'] = recipe_df['ingredients'].map(lambda x: set(eval(x)))
recipe_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 62517 entries, 163227 to 68572
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   mean_rating    62517 non-null  float64
 1   num_rating     62517 non-null  int64  
 2   recipe_id      62517 non-null  object 
 3   ingredients    62517 non-null  object 
 4   calories       62517 non-null  float64
 5   fat            62517 non-null  float64
 6   sugar          62517 non-null  float64
 7   sodium         62517 non-null  float64
 8   protien        62517 non-null  float64
 9   saturated_fat  62517 non-null  float64
 10  carbs          62517 non-null  float64
dtypes: float64(8), int64(1), object(2)
memory usage: 5.7+ MB


In [10]:
user_map = pd.DataFrame({'userid': pivot_df.index, 'id': range(len(pivot_df.index))})
ingredient_map = pd.DataFrame({'ingredient': pivot_df.columns, 'id': range(len(pivot_df.columns))})

In [85]:
weighted_df = bm25_weight(pivot_df.T, K1=100, B=0.8)
X = weighted_df.T.tocsr()
X_train, X_test = train_test_split(X, train_percentage=0.8, random_state= 42)

In [84]:
# from implicit.cpu.als import AlternatingLeastSquares
# from sklearn.base import BaseEstimator

# class ImplicitALS(BaseEstimator):
#     def __init__(self, factors=300, regularization=0.05, iterations=15, use_native=True, 
#                  num_threads=0, random_state=None):
#         self.factors = factors
#         self.regularization = regularization
#         self.iterations = iterations
#         self.use_native = use_native
#         self.num_threads = num_threads
#         self.random_state = random_state
#         self.model = None

#     def fit(self, X, y=None):
#         self.model = AlternatingLeastSquares(factors=self.factors, regularization=self.regularization, 
#                                              iterations=self.iterations, use_native=self.use_native, 
#                                              num_threads=self.num_threads, random_state=self.random_state)
#         self.model.fit(X)
#         return self

#     def transform(self, X):
#         return self.model.transform(X)

#     def predict(self, X):
#         return self.model.predict(X)

#     def get_params(self, deep=True):
#         return {
#             'factors': self.factors,
#             'regularization': self.regularization,
#             'iterations': self.iterations,
#             'use_native': self.use_native,
#             'num_threads': self.num_threads,
#             'random_state': self.random_state
#         }

#     def set_params(self, **params):
#         for key, value in params.items():
#             setattr(self, key, value)
#         return self


In [86]:
model = AlternatingLeastSquares(factors=300, regularization=0.05, alpha=2.0)
model.fit(X_train)

  0%|          | 0/15 [00:00<?, ?it/s]

In [95]:
ranking_metrics_at_k(model, X_train, X_test, K=2)

  0%|          | 0/11339 [00:00<?, ?it/s]

{'precision': 0.38945743273048083,
 'map': 0.3568215892053973,
 'ndcg': 0.40318187082742013,
 'auc': 0.5219266506484861}

In [96]:
test_user = user_map.sample(1).squeeze()
test_user

userid    U5528241
id            5623
Name: 5623, dtype: object

In [97]:
ids, scores = model.recommend(test_user['id'], X_train[test_user['id']], N=2, filter_already_liked_items=False)
pd.DataFrame({"ingredient": ingredient_map.loc[ids, 'ingredient'].values, "score": scores, "already_liked_train": np.in1d(ids, X_train[test_user['id']].indices), "already_liked_whole": np.in1d(ids, X[test_user['id']].indices)})

Unnamed: 0,ingredient,score,already_liked_train,already_liked_whole
0,milk,1.163285,True,True
1,whipped cream,0.902739,True,True


In [98]:
sim_item_ids, sim_score = model.similar_items(ids, 3)
pd.DataFrame({"recommended_ingredient": ingredient_map.loc[sim_item_ids[:,0], 'ingredient'].values
            , "replacement_ingredient_1": ingredient_map.loc[sim_item_ids[:,1], 'ingredient'].values
            , "recommended_ingredient_2": ingredient_map.loc[sim_item_ids[:,2], 'ingredient'].values})

Unnamed: 0,recommended_ingredient,replacement_ingredient_1,recommended_ingredient_2
0,milk,egg,eggs
1,whipped cream,strawberry jell-o gelatin dessert,firm bananas


In [99]:
top2_ingredients = set(ingredient_map.loc[ids, 'ingredient'])

In [100]:
recommended_recipes = recipe_df[recipe_df['ingredients'].map(lambda x: len(top2_ingredients.difference(x))==0)]
recommended_recipes

Unnamed: 0,mean_rating,num_rating,recipe_id,ingredients,calories,fat,sugar,sodium,protien,saturated_fat,carbs
146664,6.000000,1,R1140104,"{ice, vanilla-flavored syrup, milk, whipped cr...",280.4,15.0,79.0,11.0,13.0,25.0,13.0
114488,5.166667,6,R1181236,"{espresso coffee, milk, whipped cream, chocola...",252.2,11.0,114.0,7.0,10.0,20.0,14.0
16895,5.000000,1,R1197870,"{half-and-half, chocolate curls, vanilla extra...",795.8,76.0,189.0,28.0,51.0,73.0,23.0
117096,6.000000,3,R1205492,"{ice, milk, whipped cream, chocolate syrup, co...",210.4,7.0,114.0,5.0,6.0,12.0,13.0
13480,6.000000,1,R1412058,{chocolate flavor instant pudding and pie fill...,303.5,15.0,111.0,13.0,8.0,23.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...
162492,6.000000,1,R9657470,"{kahlua, chocolate mousse mix, eggs, milk, whi...",202.6,27.0,18.0,1.0,4.0,24.0,2.0
152073,6.000000,1,R9802281,"{whipped cream, instant pudding mix, vanilla i...",306.1,22.0,93.0,15.0,19.0,44.0,11.0
128648,5.000000,1,R9842695,"{egg, thickened cream, marsala, milk, whipped ...",591.4,41.0,212.0,15.0,15.0,81.0,27.0
60206,6.000000,1,R9888829,"{bananas, flour, milk, whipped cream, instant ...",340.0,25.0,94.0,11.0,9.0,51.0,15.0


In [101]:
#
recommended_recipes.sort_values(['calories', 'protien', 'sugar', 'sodium', 'fat', 'carbs'], ascending=[True, False, True, True, True, True ])

Unnamed: 0,mean_rating,num_rating,recipe_id,ingredients,calories,fat,sugar,sodium,protien,saturated_fat,carbs
147601,5.800000,5,R7138587,"{cinnamon sticks, vanilla extract, milk, groun...",74.2,4.0,25.0,1.0,5.0,8.0,3.0
144057,6.000000,1,R9607575,"{ice, vanilla, maple syrup, cinnamon, nutmeg, ...",91.0,3.0,32.0,7.0,5.0,7.0,5.0
52765,4.750000,4,R608470,"{vanilla ice cream, vanilla, milk, whipped cre...",101.3,7.0,45.0,1.0,3.0,14.0,4.0
129359,3.000000,2,R7115041,"{eggs, milk, salt, vanilla sugar, whipped crea...",116.2,9.0,0.0,15.0,8.0,16.0,3.0
148541,6.000000,1,R7638912,"{cinnamon syrup, caramel sauce, brewed espress...",134.2,8.0,0.0,5.0,10.0,17.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...
107451,5.833333,6,R8055977,"{club soda, vanilla ice cream, milk, whipped c...",507.9,33.0,189.0,13.0,18.0,62.0,23.0
128648,5.000000,1,R9842695,"{egg, thickened cream, marsala, milk, whipped ...",591.4,41.0,212.0,15.0,15.0,81.0,27.0
16895,5.000000,1,R1197870,"{half-and-half, chocolate curls, vanilla extra...",795.8,76.0,189.0,28.0,51.0,73.0,23.0
127187,6.000000,1,R2385081,"{semi-sweet chocolate chips, cinnamon, vanilla...",1265.2,165.0,150.0,11.0,35.0,334.0,21.0
