### Import TFRS

First, install and import TFRS:

In [None]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

### Read the data

In [None]:
# # Ratings data.
# ratings = tfds.load('movielens/100k-ratings', split="train")
# # Features of all the available movies.
# movies = tfds.load('movielens/100k-movies', split="train")

# # Select the basic features.
# ratings = ratings.map(lambda x: {
#     "movie_title": x["movie_title"],
#     "user_id": x["user_id"]
# })
# movies = movies.map(lambda x: x["movie_title"])

In [5]:
# Ratings data.
ratings = pd.read_csv('../Downloads/user_rating.csv')
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings))
# Features of all the available movies.
movies = pd.read_csv('../Downloads/recipe_dataset.csv')
movies = tf.data.Dataset.from_tensor_slices(dict(movies))

# Select the basic features.
ratings = ratings.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
movies = movies.map(lambda x: x["Title"])

Build vocabularies to convert user ids and movie titles into integer indices for embedding layers:

In [6]:
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
# user_ids_vocabulary.adapt(ratings.map(lambda x: tf.strings.as_string(x["user_id"])))
user_ids_vocabulary.adapt(ratings.map(lambda x: x["user_id"]))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

### Define a model

We can define a TFRS model by inheriting from `tfrs.Model` and implementing the `compute_loss` method:

In [7]:
class MovieLensModel(tfrs.Model):
  # We derive from a custom base class to help reduce boilerplate. Under the hood,
  # these are still plain Keras Models.

  def __init__(
      self,
      user_model: tf.keras.Model,
      movie_model: tf.keras.Model,
      task: tfrs.tasks.Retrieval):
    super().__init__()

    # Set up user and movie representations.
    self.user_model = user_model
    self.movie_model = movie_model

    # Set up a retrieval task.
    self.task = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    # Define how the loss is computed.

    user_embeddings = self.user_model(features["user_id"])
    movie_embeddings = self.movie_model(features["movie_title"])

    return self.task(user_embeddings, movie_embeddings)

Define the two models and the retrieval task.

In [8]:
# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 64)
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 64)
])

# Define your objectives.
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    movies.batch(128).map(movie_model)
  )
)




### Fit and evaluate it.

Create the model, train it, and generate predictions:



In [9]:
# Create a retrieval model.
model = MovieLensModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# Train for 3 epochs.
model.fit(ratings.batch(4096), epochs=3)

# Use brute-force search to set up retrieval using the trained representations.
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(
    movies.batch(100).map(lambda title: (title, model.movie_model(title))))

# Get some recommendations.
_, titles = index(np.array(["42"]))
print(f"Top 3 recommendations for user 42: {titles[0, :3]}")

Epoch 1/3
Epoch 2/3
Epoch 3/3
Top 3 recommendations for user 42: [b'Tahu & Mie Crispy' b'Sop kikil (kaki kambing)'
 b'Ikan Sarden Sambel Merah No MSG']


In [10]:
# Get some recommendations.
_, titles = index(np.array(["0"]))
print(f"Top 3 recommendations for user 1551: {titles[0, :3]}")

Top 3 recommendations for user 1551: [b'Tahu & Mie Crispy' b'Sop kikil (kaki kambing)'
 b'Ikan Sarden Sambel Merah No MSG']


In [11]:
# Get some recommendations.
_, titles = index(np.array(["15051"]))
print(f"Top 3 recommendations for user 15051: {titles[0, :3]}")

Top 3 recommendations for user 15051: [b'Tahu & Mie Crispy' b'Sop kikil (kaki kambing)'
 b'Ikan Sarden Sambel Merah No MSG']


In [12]:
# Get some recommendations.
_, titles = index(np.array(["15052"]))
print(f"Top 3 recommendations for user 15051: {titles[0, :3]}")

Top 3 recommendations for user 15051: [b'Tahu & Mie Crispy' b'Sop kikil (kaki kambing)'
 b'Ikan Sarden Sambel Merah No MSG']


In [13]:
# Get some recommendations.
_, titles = index(np.array(["200"]))
print(f"Top 3 recommendations for user 1551: {titles[0, :3]}")

Top 3 recommendations for user 1551: [b'Tumis Cumi asin pedas campur tahu..' b'Thai Beef Salad'
 b'Bubur Ayam Bumbu Kuning']


In [14]:
# Get some recommendations.
_, titles = index(np.array(["100"]))
print(f"Top 3 recommendations for user 15051: {titles[0, :3]}")

Top 3 recommendations for user 15051: [b'Sate Kambing Empuk Sambel Kecap' b'Bola Bola isi tahu'
 b'Tempe Kemangi Bu-Ris Enaak']


In [15]:
# Get some recommendations.
_, titles = index(np.array(["1000"]))
print(f"Top 3 recommendations for user 15051: {titles[0, :3]}")

Top 3 recommendations for user 15051: [b'Kari telur tempe tahu' b'Chicken katsu' b'Chicken katsu']


In [16]:
# Get some recommendations.
_, titles = index(np.array(["150"]))
print(f"Top 3 recommendations for user 15051: {titles[0, :3]}")

Top 3 recommendations for user 15051: [b'Asam pedas ceker tulang ayam kuah mercon' b'Bandeng Bakar tulang lunak'
 b'Ca brokoli ayam']


## **Calculate validation dataset**

In [17]:
# Ratings data.
ratings = pd.read_csv('user_rating.csv')
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings))

# Split into train and test set in rating dataset
tf.random.set_seed(42)
shuffled = ratings.shuffle(len(ratings), seed=42, reshuffle_each_iteration=False)
ratings_train = shuffled.take(int(0.8 * len(ratings)))
ratings_val = shuffled.skip(int(0.8 * len(ratings))).take(int(0.2 * len(ratings)))

# Movies dataset
movies = pd.read_csv('recipe_dataset.csv')
movies = tf.data.Dataset.from_tensor_slices(dict(movies))

# Select the basic features.
ratings_train = ratings_train.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
ratings_val = ratings_val.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
movies = movies.map(lambda x: x["Title"])

# Rest of the code remains the same...

user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x:  tf.strings.as_string(x["User_id"])))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

class MovieLensModel(tfrs.Model):
    # We derive from a custom base class to help reduce boilerplate. Under the hood,
    # these are still plain Keras Models.

    def __init__(
        self,
        user_model: tf.keras.Model,
        movie_model: tf.keras.Model,
        task: tfrs.tasks.Retrieval
    ):
        super().__init__()

        # Set up user and movie representations.
        self.user_model = user_model
        self.movie_model = movie_model

        # Set up a retrieval task.
        self.task = task

    def compute_loss(
        self, features: Dict[Text, tf.Tensor], training=False
    ) -> tf.Tensor:
        # Define how the loss is computed.

        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])

        return self.task(user_embeddings, movie_embeddings)

# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 64)
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 64)
])

# Define your objectives.
task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(
        candidates = movies.batch(128).map(movie_model)
    )
)

# Create a retrieval model.
model = MovieLensModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# fitting model to ratings_train dataset
model.fit(ratings_train.batch(5000), epochs=20)

# evaluate model to ratings_val dataset
model.evaluate(ratings_val.batch(5000), return_dict=True)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


{'factorized_top_k/top_1_categorical_accuracy': 0.0,
 'factorized_top_k/top_5_categorical_accuracy': 0.0,
 'factorized_top_k/top_10_categorical_accuracy': 0.0006798096583224833,
 'factorized_top_k/top_50_categorical_accuracy': 0.0038522547110915184,
 'factorized_top_k/top_100_categorical_accuracy': 0.006571493111550808,
 'loss': 60111.12890625,
 'regularization_loss': 0,
 'total_loss': 60111.12890625}

### **Second Optimization**

In [21]:
# Ratings data.
ratings = pd.read_csv('user_rating.csv')
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings))

# Split into train and test set in rating dataset
tf.random.set_seed(42)
shuffled = ratings.shuffle(len(ratings), seed=42, reshuffle_each_iteration=False)
ratings_train = shuffled.take(int(0.7 * len(ratings)))
ratings_val = shuffled.skip(int(0.7 * len(ratings))).take(int(0.3 * len(ratings)))

# Movies dataset
movies = pd.read_csv('recipe_dataset.csv')
movies = tf.data.Dataset.from_tensor_slices(dict(movies))

# Select the basic features.
ratings_train = ratings_train.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
ratings_val = ratings_val.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
movies = movies.map(lambda x: x["Title"])

# Rest of the code remains the same...

user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x: tf.strings.as_string(x["User_id"])))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

class MovieLensModel(tfrs.Model):
    def __init__(
        self,
        user_model: tf.keras.Model,
        movie_model: tf.keras.Model,
        task: tfrs.tasks.Retrieval
    ):
        super().__init__()

        self.user_model = user_model
        self.movie_model = movie_model
        self.task = task

    def compute_loss(
        self, features: Dict[Text, tf.Tensor], training=False
    ) -> tf.Tensor:
        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])

        return self.task(user_embeddings, movie_embeddings)

# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 128, embeddings_regularizer=tf.keras.regularizers.l2(0.01)),  # Increased embedding size
     # add l2 regulazation
    tf.keras.layers.Dropout(0.2) # Added dropout regularization
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 128, embeddings_regularizer=tf.keras.regularizers.l2(0.01)),  # Increased embedding size
    # add l2 regulazation
    tf.keras.layers.Dropout(0.2) # Added dropout regularization
])

# Define your objectives.
task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(
        candidates=movies.batch(128).map(movie_model)
    )
)

# Create a retrieval model.
model = MovieLensModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# # Increase epochs for training
# model.fit(ratings_train.batch(5000), epochs=500)  # Increased number of epochs

# # Evaluate model on validation set
# model.evaluate(ratings_val.batch(5000), return_dict=True)

# Add early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_factorized_top_k/top_5_categorical_accuracy',
    patience=3,  # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restore the weights from the epoch with the lowest validation loss
)

# fitting model
model.fit(ratings_train.batch(100), validation_data=ratings_val.batch(100), epochs=500, callbacks=[early_stopping])

# Additional suggestions for experimentation:
# - Hyperparameter tuning
# - Data preprocessing techniques
# - Regularization techniques
# - Trying different loss functions
# - Ensemble methods


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500


<keras.callbacks.History at 0x169a92a5d90>

## **User regulazation L2**

In [None]:
# Ratings data.
ratings = pd.read_csv('user_rating.csv')
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings))

# Split into train and test set in rating dataset
tf.random.set_seed(42)
shuffled = ratings.shuffle(len(ratings), seed=42, reshuffle_each_iteration=False)
ratings_train = shuffled.take(int(0.8 * len(ratings)))
ratings_val = shuffled.skip(int(0.8 * len(ratings))).take(int(0.2 * len(ratings)))

# Movies dataset
movies = pd.read_csv('recipe_dataset.csv')
movies = tf.data.Dataset.from_tensor_slices(dict(movies))

# Select the basic features.
ratings_train = ratings_train.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
ratings_val = ratings_val.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
movies = movies.map(lambda x: x["Title"])

# Rest of the code remains the same...

user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x:  tf.strings.as_string(x["User_id"])))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

class MovieLensModel(tfrs.Model):
    # We derive from a custom base class to help reduce boilerplate. Under the hood,
    # these are still plain Keras Models.

    def __init__(
        self,
        user_model: tf.keras.Model,
        movie_model: tf.keras.Model,
        task: tfrs.tasks.Retrieval
    ):
        super().__init__()

        # Set up user and movie representations.
        self.user_model = user_model
        self.movie_model = movie_model

        # Set up a retrieval task.
        self.task = task

    def compute_loss(
        self, features: Dict[Text, tf.Tensor], training=False
    ) -> tf.Tensor:
        # Define how the loss is computed.

        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])

        return self.task(user_embeddings, movie_embeddings)

# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 64, embeddings_regularizer=tf.keras.regularizers.l2(0.01))
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 64, embeddings_regularizer=tf.keras.regularizers.l2(0.01))
])

# Define your objectives.
task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(
        candidates = movies.batch(128).map(movie_model)
    )
)

# Create a retrieval model.
model = MovieLensModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# fitting model to ratings_train dataset
model.fit(ratings_train.batch(5000), epochs=10)

# evaluate model to ratings_val dataset
model.evaluate(ratings_val.batch(5000), return_dict=True)




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


{'factorized_top_k/top_1_categorical_accuracy': 0.0,
 'factorized_top_k/top_5_categorical_accuracy': 0.0,
 'factorized_top_k/top_10_categorical_accuracy': 0.0006798096583224833,
 'factorized_top_k/top_50_categorical_accuracy': 0.0033990482334047556,
 'factorized_top_k/top_100_categorical_accuracy': 0.007477906066924334,
 'loss': 50115.67578125,
 'regularization_loss': 2242.15966796875,
 'total_loss': 52357.8359375}

## **Dropout Layers & early stopping**

In [None]:
# Ratings data.
ratings = pd.read_csv('user_rating.csv')
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings))

# Split into train and test set in rating dataset
tf.random.set_seed(42)
shuffled = ratings.shuffle(len(ratings), seed=42, reshuffle_each_iteration=False)
ratings_train = shuffled.take(int(0.8 * len(ratings)))
ratings_val = shuffled.skip(int(0.8 * len(ratings))).take(int(0.2 * len(ratings)))

# Movies dataset
movies = pd.read_csv('recipe_dataset.csv')
movies = tf.data.Dataset.from_tensor_slices(dict(movies))

# Select the basic features.
ratings_train = ratings_train.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
ratings_val = ratings_val.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
movies = movies.map(lambda x: x["Title"])

# Rest of the code remains the same...

user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x:  tf.strings.as_string(x["User_id"])))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

class MovieLensModel(tfrs.Model):
    # We derive from a custom base class to help reduce boilerplate. Under the hood,
    # these are still plain Keras Models.

    def __init__(
        self,
        user_model: tf.keras.Model,
        movie_model: tf.keras.Model,
        task: tfrs.tasks.Retrieval
    ):
        super().__init__()

        # Set up user and movie representations.
        self.user_model = user_model
        self.movie_model = movie_model

        # Set up a retrieval task.
        self.task = task

    def compute_loss(
        self, features: Dict[Text, tf.Tensor], training=False
    ) -> tf.Tensor:
        # Define how the loss is computed.

        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])

        return self.task(user_embeddings, movie_embeddings)

# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 64, embeddings_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.2)
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 64, embeddings_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.2)
])

# Define your objectives.
task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(
        candidates = movies.batch(128).map(movie_model)
    )
)

# Create a retrieval model.
model = MovieLensModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# Add early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,  # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restore the weights from the epoch with the lowest validation loss
)

# fitting model
model.fit(ratings_train.batch(10_000), validation_data=ratings_val.batch(10_000), epochs=10, callbacks=[early_stopping])





Epoch 1/10



Epoch 2/10
Epoch 3/10
Epoch 4/10


<keras.callbacks.History at 0x7f26fa0de830>

## **Model complexity**
add dense layer and increase embedding layer

In [None]:
# Ratings data.
ratings = pd.read_csv('user_rating.csv')
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings))

# Split into train and test set in rating dataset
tf.random.set_seed(42)
shuffled = ratings.shuffle(len(ratings), seed=42, reshuffle_each_iteration=False)
ratings_train = shuffled.take(int(0.8 * len(ratings)))
ratings_val = shuffled.skip(int(0.8 * len(ratings))).take(int(0.2 * len(ratings)))

# Movies dataset
movies = pd.read_csv('recipe_dataset.csv')
movies = tf.data.Dataset.from_tensor_slices(dict(movies))

# Select the basic features.
ratings_train = ratings_train.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
ratings_val = ratings_val.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
movies = movies.map(lambda x: x["Title"])

# Rest of the code remains the same...

user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x:  tf.strings.as_string(x["User_id"])))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

class MovieLensModel(tfrs.Model):
    # We derive from a custom base class to help reduce boilerplate. Under the hood,
    # these are still plain Keras Models.

    def __init__(
        self,
        user_model: tf.keras.Model,
        movie_model: tf.keras.Model,
        task: tfrs.tasks.Retrieval
    ):
        super().__init__()

        # Set up user and movie representations.
        self.user_model = user_model
        self.movie_model = movie_model

        # Set up a retrieval task.
        self.task = task

    def compute_loss(
        self, features: Dict[Text, tf.Tensor], training=False
    ) -> tf.Tensor:
        # Define how the loss is computed.

        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])

        return self.task(user_embeddings, movie_embeddings)

# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 128, embeddings_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dropout(0.2)
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 128, embeddings_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dropout(0.2)
])

# Define your objectives.
task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(
        candidates = movies.batch(128).map(movie_model)
    )
)

# Create a retrieval model.
model = MovieLensModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# Add early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,  # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restore the weights from the epoch with the lowest validation loss
)

# fitting model
model.fit(ratings_train.batch(10_000), validation_data=ratings_val.batch(10_000), epochs=10, callbacks=[early_stopping])





Epoch 1/10



Epoch 2/10
Epoch 3/10
Epoch 4/10


<keras.callbacks.History at 0x7f26f99b1150>

## **Change learning rate**
Change dropout

In [None]:
# Ratings data.
ratings = pd.read_csv('user_rating.csv')
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings))

# Split into train and test set in rating dataset
tf.random.set_seed(42)
shuffled = ratings.shuffle(len(ratings), seed=42, reshuffle_each_iteration=False)
ratings_train = shuffled.take(int(0.8 * len(ratings)))
ratings_val = shuffled.skip(int(0.8 * len(ratings))).take(int(0.2 * len(ratings)))

# Movies dataset
movies = pd.read_csv('recipe_dataset.csv')
movies = tf.data.Dataset.from_tensor_slices(dict(movies))

# Select the basic features.
ratings_train = ratings_train.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
ratings_val = ratings_val.map(lambda x: {
    "movie_title": x["Title"],
    "user_id": tf.strings.as_string(x["User_id"])
})
movies = movies.map(lambda x: x["Title"])

# Rest of the code remains the same...

user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x:  tf.strings.as_string(x["User_id"])))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

class MovieLensModel(tfrs.Model):
    # We derive from a custom base class to help reduce boilerplate. Under the hood,
    # these are still plain Keras Models.

    def __init__(
        self,
        user_model: tf.keras.Model,
        movie_model: tf.keras.Model,
        task: tfrs.tasks.Retrieval
    ):
        super().__init__()

        # Set up user and movie representations.
        self.user_model = user_model
        self.movie_model = movie_model

        # Set up a retrieval task.
        self.task = task

    def compute_loss(
        self, features: Dict[Text, tf.Tensor], training=False
    ) -> tf.Tensor:
        # Define how the loss is computed.

        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])

        return self.task(user_embeddings, movie_embeddings)

# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 132, embeddings_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dropout(0.3)
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 132, embeddings_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dropout(0.3)
])

# Define your objectives.
task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(
        candidates = movies.batch(128).map(movie_model)
    )
)

# Create a retrieval model.
model = MovieLensModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.01))

# Add early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_factorized_top_k/top_5_categorical_accuracy',
    patience=3,  # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restore the weights from the epoch with the lowest validation loss
)

# fitting model
model.fit(ratings_train.batch(10_000), validation_data=ratings_val.batch(10_000), epochs=10, callbacks=[early_stopping])



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f26f8d4ea10>