In [8]:
import pandas as pd
import numpy as np

In [None]:
!pip install tensorflow-recommenders
!pip install pprintpp

In [10]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

In [11]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
interaction_data = pd.read_csv("/content/drive/MyDrive/Food_Web_Dataset/RAW_interactions.csv")
recipe_data = pd.read_csv("/content/drive/MyDrive/Food_Web_Dataset/RAW_recipes.csv")
interaction_train = pd.read_csv("/content/drive/MyDrive/Food_Web_Dataset/interactions_train.csv")
interaction_test = pd.read_csv("/content/drive/MyDrive/Food_Web_Dataset/interactions_test.csv")

In [13]:
interaction_data

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."
...,...,...,...,...,...
1132362,116593,72730,2003-12-09,0,Another approach is to start making sauce with...
1132363,583662,386618,2009-09-29,5,These were so delicious! My husband and I tru...
1132364,157126,78003,2008-06-23,5,WOW! Sometimes I don't take the time to rate ...
1132365,53932,78003,2009-01-11,4,Very good! I used regular port as well. The ...


In [14]:
interaction_train

Unnamed: 0,user_id,recipe_id,date,rating,u,i
0,2046,4684,2000-02-25,5.0,22095,44367
1,2046,517,2000-02-25,5.0,22095,87844
2,1773,7435,2000-03-13,5.0,24732,138181
3,1773,278,2000-03-13,4.0,24732,93054
4,2046,3431,2000-04-07,5.0,22095,101723
...,...,...,...,...,...,...
698896,926904,457971,2018-12-18,5.0,13681,141067
698897,2002312797,27208,2018-12-18,5.0,14897,99787
698898,1290903,131607,2018-12-18,5.0,11605,76163
698899,226867,363072,2018-12-18,5.0,3604,29101


In [15]:
interaction_test

Unnamed: 0,user_id,recipe_id,date,rating,u,i
0,8937,44551,2005-12-23,4.0,2,173538
1,56680,126118,2006-10-07,4.0,16,177847
2,349752,219596,2008-04-12,0.0,26,89896
3,628951,82783,2007-11-13,2.0,45,172637
4,92816,435013,2013-07-31,3.0,52,177935
...,...,...,...,...,...,...
12450,101053,179011,2009-01-03,5.0,25054,130258
12451,252205,81398,2005-12-26,2.0,25055,152255
12452,624305,142984,2011-01-15,1.0,25057,139864
12453,173575,104842,2004-12-18,3.0,25059,140646


In [16]:
interaction_data = interaction_data.astype({'user_id': 'string', 'recipe_id':'string'})
interaction_train = interaction_train.astype({'user_id': 'string', 'recipe_id':'string'})
interaction_test = interaction_test.astype({'user_id': 'string', 'recipe_id':'string'})

In [17]:
class RankingModel(tf.keras.Model):

    def __init__(self):
        super().__init__()
        embedding_dimension = 32

        self.user_embeddings = tf.keras.Sequential([
                                    tf.keras.layers.experimental.preprocessing.StringLookup(
                                        vocabulary=uniqueUserIds, mask_token=None),
                                        # add addional embedding to account for unknow tokens
                                    tf.keras.layers.Embedding(len(uniqueUserIds)+1, embedding_dimension)
                                    ])

        self.product_embeddings = tf.keras.Sequential([
                                    tf.keras.layers.experimental.preprocessing.StringLookup(
                                        vocabulary=uniqueFoodIds, mask_token=None),
                                    # add addional embedding to account for unknow tokens
                                    tf.keras.layers.Embedding(len(uniqueFoodIds)+1, embedding_dimension)
                                    ])
        # Set up a retrieval task and evaluation metrics over the
        # entire dataset of candidates.
        self.ratings = tf.keras.Sequential([
                            tf.keras.layers.Dense(256, activation="relu"),
                            tf.keras.layers.Dense(64,  activation="relu"),
                            tf.keras.layers.Dense(1)
                              ])

    def call(self, userId, foodId):
        user_embeddings  = self.user_embeddings (userId)
        food_embeddings = self.product_embeddings(foodId)
        return self.ratings(tf.concat([user_embeddings, food_embeddings], axis=1))

# Build a model.
class FoodModel(tfrs.models.Model):

    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.Model = RankingModel()
        self.task: tf.keras.layers.Layer   = tfrs.tasks.Ranking(
                                                    loss    =  tf.keras.losses.MeanSquaredError(),
                                                    metrics = [tf.keras.metrics.RootMeanSquaredError()])


    def compute_loss(self, features, training=False):
        rating_predictions = self.ranking_model(features["userID"], features["foodID"]  )

        return self.task( labels=features["rating"], predictions=rating_predictions)

In [18]:
uniqueUserIds = interaction_data.user_id.unique()
uniqueFoodIds = interaction_data.recipe_id.unique()

In [19]:
train_data = tf.data.Dataset.from_tensor_slices(
{
    "userID":tf.cast(interaction_train.user_id.values, tf.string),
    "foodID":tf.cast(interaction_train.recipe_id.values, tf.string),
    "rating":tf.cast(interaction_train.rating.values, tf.float32)
})

test_data = tf.data.Dataset.from_tensor_slices(
{
    "userID":tf.cast(interaction_test.user_id.values, tf.string),
    "foodID":tf.cast(interaction_test.recipe_id.values, tf.string),
    "rating":tf.cast(interaction_test.rating.values, tf.float32)
})

In [20]:
tf.random.set_seed(42)
train_data = train_data.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

In [21]:
model = FoodModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.001))
cached_train = train_data.shuffle(100_000).batch(8192).cache()
cached_test = test_data.batch(4096).cache()
model.fit(cached_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7c25080c90f0>

In [22]:
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.3498554229736328,
 'loss': 2.004978895187378,
 'regularization_loss': 0,
 'total_loss': 2.004978895187378}

In [23]:
user_rand = uniqueUserIds[200]
test_rating = {}
for m in test_data.take(10):
    test_rating[m["foodID"].numpy()]=RankingModel()(tf.convert_to_tensor([user_rand]),tf.convert_to_tensor([m["foodID"]]))

In [25]:
print("Top 10 recommended products for User {}: ".format(user_rand))
for m in sorted(test_rating, key=test_rating.get, reverse=True):
    print(recipe_data.loc[recipe_data['id'] == int(m.decode())]['name'].item())

Top 10 recommended products for User 450004: 
peanut butter spirals
azumaya pot stickers
breakfast biscuit sandwiches
strawberry fudge
blueberry pancake syrup   low carb
lemon herb chicken w mushrooms low carb
ww 3 points   grilled beef fajitas
ranch egg salad croissant sandwiches
sweet and sour pork ribs
skinny style chocolatey hot cocoa
