In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import layers
from tensorflow import keras

In [5]:
df = pd.read_csv("../dataset/preprocessed_data.csv")
df

Unnamed: 0,user_id,recipe_id,date,rating,review,name,minutes,contributor_id,submitted,tags,...,sugar,sodium,protein,saturated_fat,carbohydrates,food_types,negative,neutral,positive,compound
0,7708,60599,2005-09-02,4,very good,kfc honey bbq strips,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.000,0.238,0.762,0.4927
1,27707,60599,2005-12-22,5,better than the real,kfc honey bbq strips,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.000,0.508,0.492,0.4404
2,35308,60599,2006-09-26,5,absolutely awesome i was speechless when i tri...,kfc honey bbq strips,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.000,0.883,0.117,0.6590
3,19399,60599,2007-03-09,5,these taste absolutely wonderful my son in law...,kfc honey bbq strips,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.000,0.675,0.325,0.8908
4,43887,60599,2008-02-20,0,made my own buttermilk w vinegar and milk. use...,kfc honey bbq strips,40,166019,2005-08-24,"60-minutes-or-less, time-to-make, main-ingredi...",...,40.0,37.0,78.0,4.0,10.0,Non-veg,0.000,0.929,0.071,0.4588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
942363,10188,22092,2007-04-26,0,i am not going to rate this because i did have...,garlic clove chicken,65,49304,2003-02-21,"weeknight, time-to-make, course, main-ingredie...",...,1.0,7.0,53.0,29.0,2.0,Non-veg,0.071,0.929,0.000,-0.5075
942364,51769,161871,2009-09-29,5,these were so delicious my husband and i truly...,stacey e s yummy veggie burgers,21,742029,2009-08-24,"30-minutes-or-less, time-to-make, course, main...",...,11.0,24.0,10.0,0.0,8.0,Healthy,0.000,0.752,0.248,0.9609
942365,15786,33054,2008-06-23,5,wow sometimes i do not take the time to rate r...,pot roast with port stove top,115,108291,2003-12-05,"weeknight, time-to-make, course, preparation, ...",...,22.0,20.0,93.0,112.0,4.0,Veg,0.063,0.749,0.188,0.9520
942366,5051,33054,2009-01-11,4,very good i used regular port as well. the gra...,pot roast with port stove top,115,108291,2003-12-05,"weeknight, time-to-make, course, preparation, ...",...,22.0,20.0,93.0,112.0,4.0,Veg,0.000,0.584,0.416,0.8856


In [7]:
df = df.sample(frac=1, random_state=42)

user_ids = df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
recipe_ids = df["recipe_id"].unique().tolist()
recipe2recipe_encoded = {x: i for i, x in enumerate(recipe_ids)}
recipe_encoded2recipe = {i: x for i, x in enumerate(recipe_ids)}
df["user"] = df["user_id"].map(user2user_encoded)
df["recipe"] = df["recipe_id"].map(recipe2recipe_encoded)

num_users = len(user2user_encoded)
num_recipes = len(recipe_encoded2recipe)
df["rating"] = df["rating"].values.astype(np.float32)

# min and max ratings will be used to normalize the ratings later
min_rating = min(df["rating"])
max_rating = max(df["rating"])

print(
    "Number of users: {}, Number of Recipes: {}, Min rating: {}, Max rating: {}".format(
        num_users, num_recipes, min_rating, max_rating
    )
)

Number of users: 192751, Number of Recipes: 194084, Min rating: 0.0, Max rating: 5.0


In [8]:
x = df[["user", "recipe"]].values
# Normalize the targets between 0 and 1. Makes it easy to train.
y = df["rating"] / 5
# Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.8 * df.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

In [10]:
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

Metal device set to: Apple M1 Pro
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1


In [11]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

EMBEDDING_SIZE = 32


class RecommenderNet(keras.Model):
    def __init__(self, num_users, num_recipes, embedding_size, **kwargs):
        super().__init__(**kwargs)
        self.num_users = num_users
        self.num_recipes = num_recipes
        self.embedding_size = embedding_size
        self.user_embedding = layers.Embedding(
            num_users,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-1),
        )
        self.user_bias = layers.Embedding(num_users, 1)
        self.recipe_embedding = layers.Embedding(
            num_recipes,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-1),
        )
        self.recipe_bias = layers.Embedding(num_recipes, 1)

    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        user_bias = self.user_bias(inputs[:, 0])
        recipe_vector = self.recipe_embedding(inputs[:, 1])
        recipe_bias = self.recipe_bias(inputs[:, 1])
        dot_user_recipe = tf.tensordot(user_vector, recipe_vector, 2)
        # Add all the components (including bias)
        x = dot_user_recipe + user_bias + recipe_bias
        # The sigmoid activation forces the rating to between 0 and 1
        return tf.nn.sigmoid(x)


# with strategy.scope():
model = RecommenderNet(num_users, num_recipes, EMBEDDING_SIZE)

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr_on_plateau = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-8, verbose=1)

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='mean_squared_error', metrics=['mse']
)



In [None]:
history = model.fit(
    x=x_train,
    y=y_train,
    batch_size=128,
    epochs=50,
    verbose=1,
    validation_data=(x_val, y_val),
    callbacks=[reduce_lr_on_plateau, early_stopping]
)

Epoch 1/50


2023-04-29 23:05:20.838241: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50

In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper left")
plt.show()

In [12]:
y_pred = model.predict(x_val)
print(y_pred)

[[0.9274967 ]
 [0.9894901 ]
 [0.9401887 ]
 ...
 [0.99128026]
 [0.91530746]
 [0.8837133 ]]


In [13]:
y_pred = y_pred.flatten()
y_pred

array([0.9274967 , 0.9894901 , 0.9401887 , ..., 0.99128026, 0.91530746,
       0.8837133 ], dtype=float32)

In [14]:
y_pred10 = y_pred * 5
y_pred10

array([4.6374836, 4.9474506, 4.7009435, ..., 4.9564013, 4.576537 ,
       4.4185667], dtype=float32)

In [15]:
y_val10 = y_val * 5
y_val10 = y_val10.values
y_val10

array([3., 5., 5., ..., 5., 5., 5.], dtype=float32)

In [20]:
from sklearn.metrics import mean_squared_error
from math import sqrt

rms = sqrt(mean_squared_error(y_val10, y_pred10))
print(rms)

for i in range(10):
    print("Predicted rating: {:.2f}".format(y_pred10[i]), "Actual rating: {:.2f}".format(y_val10[i]))

0.9262021552533416
Predicted rating: 4.64 Actual rating: 3.00
Predicted rating: 4.95 Actual rating: 5.00
Predicted rating: 4.70 Actual rating: 5.00
Predicted rating: 3.89 Actual rating: 3.00
Predicted rating: 4.55 Actual rating: 5.00
Predicted rating: 4.52 Actual rating: 5.00
Predicted rating: 4.93 Actual rating: 5.00
Predicted rating: 4.94 Actual rating: 5.00
Predicted rating: 4.92 Actual rating: 5.00
Predicted rating: 3.93 Actual rating: 5.00


In [16]:
model.save("recommendation")

In [17]:
!zip -r model.zip recommendation

  adding: recommendation/ (stored 0%)
  adding: recommendation/saved_model.pb (deflated 89%)
  adding: recommendation/variables/ (stored 0%)
  adding: recommendation/variables/variables.index (deflated 59%)
  adding: recommendation/variables/variables.data-00000-of-00001 (deflated 39%)
  adding: recommendation/keras_metadata.pb (deflated 81%)
  adding: recommendation/assets/ (stored 0%)
  adding: recommendation/fingerprint.pb (stored 0%)


In [18]:
from tensorflow import keras

In [19]:
model = keras.models.load_model('recommendation')