**MODEL 1 -- COLLABORATIVE MODEL**

In [1]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

from typing import Dict, Text

import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

# Read the data
ratings = tfds.load('movielens/100k-ratings', split="train")
movies = tfds.load('movielens/100k-movies', split="train")

# Select the basic features
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"]
})
movies = movies.map(lambda x: x["movie_title"])

# Building vocabularies
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x: x["user_id"]))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

# Split the data into training and test sets
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)
train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

# Define the model
class MovieLensModel(tfrs.Model):
    def __init__(self, user_model: tf.keras.Model, movie_model: tf.keras.Model, task: tfrs.tasks.Retrieval):
        super().__init__()
        self.user_model = user_model
        self.movie_model = movie_model
        self.task = task

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])
        return self.task(user_embeddings, movie_embeddings)

# Define user and movie models
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 64)
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 64)
])

# Define the retrieval task
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    movies.batch(128).map(movie_model)
))

# Create a retrieval model
model = MovieLensModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# Train the model for 3 epochs
model.fit(train.batch(4096), epochs=3)

# Set up the brute-force search index
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(
    movies.batch(100).map(lambda title: (title, model.movie_model(title))))

# Get some recommendations
_, titles = index(np.array(["42"]))
print(f"Top 3 recommendations for user 42: {titles[0, :3]}")

# Define a function to compute evaluation metrics
def evaluate_model(model, test_ratings):
    test_ratings = test_ratings.batch(4096)

    mrr_metric = tf.keras.metrics.Mean(name="MRR")
    ndcg_metric = tf.keras.metrics.Mean(name="NDCG")

    for batch in test_ratings:
        user_embeddings = model.user_model(batch["user_id"])
        movie_embeddings = model.movie_model(batch["movie_title"])
        scores = tf.linalg.matmul(user_embeddings, movie_embeddings, transpose_b=True)

        # Convert scores to ranks
        ranks = tf.argsort(tf.argsort(scores, direction='DESCENDING'), direction='ASCENDING') + 1

        # Compute MRR
        relevant = tf.equal(ranks, 1)
        mrr = tf.reduce_sum(tf.cast(relevant, tf.float32) / tf.cast(ranks, tf.float32))
        mrr_metric.update_state(mrr)

        # Compute NDCG
        gains = tf.cast(relevant, tf.float32) / tf.math.log1p(tf.cast(ranks, tf.float32))
        ndcg = tf.reduce_sum(gains)
        ndcg_metric.update_state(ndcg)

    print(f"MRR: {mrr_metric.result().numpy()}")
    print(f"NDCG: {ndcg_metric.result().numpy()}")

# Evaluate the retrieval model
evaluate_model(model, test)


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/96.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/96.2 kB[0m [31m971.6 kB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m92.2/96.2 kB[0m [31m1.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading and preparing dataset 4.70 MiB (download: 4.70 MiB, generated: 32.41 MiB, total: 37.10 MiB) to /root/tensorflow_datasets/movielens/100k-ratings/0.1.1...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/100000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/movielens/100k-ratings/incomplete.DAU2H7_0.1.1/movielens-train.tfrecord*..…

Dataset movielens downloaded and prepared to /root/tensorflow_datasets/movielens/100k-ratings/0.1.1. Subsequent calls will reuse this data.
Downloading and preparing dataset 4.70 MiB (download: 4.70 MiB, generated: 150.35 KiB, total: 4.84 MiB) to /root/tensorflow_datasets/movielens/100k-movies/0.1.1...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/1682 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/movielens/100k-movies/incomplete.FORFAE_0.1.1/movielens-train.tfrecord*...…

Dataset movielens downloaded and prepared to /root/tensorflow_datasets/movielens/100k-movies/0.1.1. Subsequent calls will reuse this data.




Epoch 1/3
Epoch 2/3
Epoch 3/3
Top 3 recommendations for user 42: [b'Only You (1994)' b'Rent-a-Kid (1995)' b'Cowboy Way, The (1994)']
MRR: 4000.0
NDCG: 5770.7802734375


**MODEL 2 -- MATRIX FACTORIZATION BASED MODEL**

In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
from typing import Dict

# Load the MovieLens 100k dataset.
ratings = tfds.load('movielens/100k-ratings', split="train")
movies = tfds.load('movielens/100k-movies', split="train")

# Select the features.
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "user_rating": x["user_rating"]
})
movies = movies.map(lambda x: x["movie_title"])

# Define the vocabularies.
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x: x["user_id"]))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

# Define the model.
class MovieLensRankingModel(tfrs.Model):
    def __init__(self, user_model: tf.keras.Model, movie_model: tf.keras.Model, task: tfrs.tasks.Ranking):
        super().__init__()
        self.user_model = user_model
        self.movie_model = movie_model
        self.task = task

    def compute_loss(self, features: Dict[str, tf.Tensor], training=False) -> tf.Tensor:
        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])
        ratings = features["user_rating"]

        return self.task(user_embeddings, movie_embeddings, ratings)

# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 64)
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 64)
])

# Define the ranking task.
task = tfrs.tasks.Ranking(
    loss=tf.keras.losses.MeanSquaredError(),
    metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

# Instantiate the ranking model.
model = MovieLensRankingModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

# Train the model.
model.fit(ratings.batch(4096), epochs=3)

# Evaluate the model.
test_ratings = tfds.load('movielens/100k-ratings', split="train")
test_ratings = test_ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "user_rating": x["user_rating"]
})

# Evaluation function to compute NDCG and MRR
def evaluate_model(model, test_ratings):
    test_ratings = test_ratings.batch(4096)

    ndcg_metric = tf.keras.metrics.Mean(name="NDCG")
    mrr_metric = tf.keras.metrics.Mean(name="MRR")

    embedding_layer = model.movie_model.layers[1]

    for batch in test_ratings:
        user_embeddings = model.user_model(batch["user_id"])
        movie_embeddings = model.movie_model(batch["movie_title"])
        ratings = batch["user_rating"]

        # Compute the scores for all movies
        all_movie_embeddings = embedding_layer(tf.range(movie_titles_vocabulary.vocab_size()))
        all_scores = tf.linalg.matmul(user_embeddings, all_movie_embeddings, transpose_b=True)

        # Compute ranks and sort them
        ranks = tf.argsort(tf.argsort(all_scores, axis=-1, direction='DESCENDING'), axis=-1)
        ranks = tf.cast(ranks, tf.float32) + 1.0

        # Compute NDCG
        relevant = tf.cast(tf.equal(ranks, 1.0), tf.float32)
        ndcg = tf.reduce_sum(relevant / tf.math.log1p(ranks))
        ndcg_metric.update_state(ndcg)

        # Compute MRR
        reciprocal_ranks = tf.reduce_sum(relevant / ranks)
        mrr_metric.update_state(reciprocal_ranks)

    print(f"NDCG: {ndcg_metric.result().numpy()}")
    print(f"MRR: {mrr_metric.result().numpy()}")

# Evaluate the model
evaluate_model(model, test_ratings)

# Define a function to get movie recommendations for a user.
def get_movie_recommendations(model, user_id):
    user_embedding = model.user_model(tf.constant([user_id]))
    embedding_layer = model.movie_model.layers[1]
    movie_embeddings = embedding_layer(tf.range(movie_titles_vocabulary.vocab_size()))
    scores = tf.reduce_sum(user_embedding * movie_embeddings, axis=1)
    movie_indices = tf.argsort(scores, direction='DESCENDING')[:3]
    recommended_movie_titles = tf.gather(movie_titles_vocabulary.get_vocabulary(), movie_indices)
    return recommended_movie_titles.numpy()

# Get recommendations for a user (e.g., user_id="42").
recommended_movies = get_movie_recommendations(model, "42")
print(f"Top 3 recommendations for user 42: {recommended_movies}")




Epoch 1/3
Epoch 2/3
Epoch 3/3




NDCG: 5770.78076171875
MRR: 4000.0
Top 3 recommendations for user 42: [b'Apostle, The (1997)' b'Tigrero: A Film That Was Never Made (1994)'
 b'3 Ninjas: High Noon At Mega Mountain (1998)']


**MODEL 3 -- NEURAL COLLABORATIVE FILTERING**

In [3]:
!pip install tensorflow-recommenders
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
from typing import Dict

# Load the MovieLens 100k dataset.
ratings = tfds.load('movielens/100k-ratings', split="train")
movies = tfds.load('movielens/100k-movies', split="train")

# Select the features.
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "user_rating": x["user_rating"]
})
movies = movies.map(lambda x: x["movie_title"])

# Define the vocabularies.
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x: x["user_id"]))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

class NCFModel(tfrs.Model):
    def __init__(self, user_model: tf.keras.Model, movie_model: tf.keras.Model, rating_model: tf.keras.Model, task: tfrs.tasks.Ranking):
        super().__init__()
        self.user_model = user_model
        self.movie_model = movie_model
        self.rating_model = rating_model
        self.task = task

    def compute_loss(self, features: Dict[str, tf.Tensor], training=False) -> tf.Tensor:
        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])
        ratings = features["user_rating"]

        x = tf.concat([user_embeddings, movie_embeddings], axis=1)
        rating_predictions = self.rating_model(x)

        return self.task(ratings, rating_predictions)

# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 32)
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 32)
])

# Define the rating prediction model.
rating_model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(1)
])

# Define the ranking task.
task = tfrs.tasks.Ranking(
    loss=tf.keras.losses.MeanSquaredError(),
    metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

# Instantiate the NCF model.
model = NCFModel(user_model, movie_model, rating_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

# Train the model.
model.fit(ratings.batch(4096), epochs=3)

# Evaluate the model.
test_ratings = tfds.load('movielens/100k-ratings', split="train")
test_ratings = test_ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "user_rating": x["user_rating"]
})

eval_result = model.evaluate(test_ratings.batch(4096))
print(f"Evaluation result: {eval_result}")

# Define evaluation metrics: NDCG and MRR
def compute_metrics(model, dataset):
    ndcg_metric = tf.keras.metrics.Mean(name="NDCG")
    mrr_metric = tf.keras.metrics.Mean(name="MRR")

    embedding_layer = model.movie_model.layers[1]

    for batch in dataset:
        user_embeddings = model.user_model(batch["user_id"])
        movie_embeddings = model.movie_model(batch["movie_title"])
        ratings = batch["user_rating"]

        all_movie_embeddings = embedding_layer(tf.range(movie_titles_vocabulary.vocab_size()))
        all_scores = tf.linalg.matmul(user_embeddings, all_movie_embeddings, transpose_b=True)

        ranks = tf.argsort(tf.argsort(all_scores, axis=-1, direction='DESCENDING'), axis=-1)
        ranks = tf.cast(ranks, tf.float32) + 1.0

        relevant = tf.cast(tf.equal(ranks, 1.0), tf.float32)
        ndcg = tf.reduce_sum(relevant / tf.math.log1p(ranks))
        ndcg_metric.update_state(ndcg)

        reciprocal_ranks = tf.reduce_sum(relevant / ranks)
        mrr_metric.update_state(reciprocal_ranks)

    print(f"NDCG: {ndcg_metric.result().numpy()}")
    print(f"MRR: {mrr_metric.result().numpy()}")

# Evaluate using custom metrics
compute_metrics(model, test_ratings.batch(4096))

# Define a function to get movie recommendations for a user.
def get_movie_recommendations(model, user_id):
    user_embedding = model.user_model(tf.constant([user_id]))
    movie_embeddings = model.movie_model.layers[1](tf.range(movie_titles_vocabulary.vocab_size()))
    scores = tf.reduce_sum(user_embedding * movie_embeddings, axis=1)
    movie_indices = tf.argsort(scores, direction='DESCENDING')[:3]
    recommended_movie_titles = tf.gather(movie_titles_vocabulary.get_vocabulary(), movie_indices)
    return recommended_movie_titles.numpy()

# Get recommendations for a user (e.g., user_id="42").
recommended_movies = get_movie_recommendations(model, "42")
print(f"Top 3 recommendations for user 42: {recommended_movies}")






Epoch 1/3
Epoch 2/3
Epoch 3/3
Evaluation result: [0.9973999261856079, 0.9555082321166992, 0, 0.9555082321166992]




NDCG: 5770.78076171875
MRR: 4000.0
Top 3 recommendations for user 42: [b'Replacement Killers, The (1998)' b'Catwalk (1995)' b'Panther (1995)']
