<a href="https://colab.research.google.com/github/edwin-19/recommend-sys/blob/main/Ranking_Task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install necessary libs
!pip install -q tensorflow-recommenders

[?25l[K     |███▉                            | 10 kB 19.5 MB/s eta 0:00:01[K     |███████▋                        | 20 kB 25.7 MB/s eta 0:00:01[K     |███████████▌                    | 30 kB 28.8 MB/s eta 0:00:01[K     |███████████████▎                | 40 kB 20.6 MB/s eta 0:00:01[K     |███████████████████             | 51 kB 14.9 MB/s eta 0:00:01[K     |███████████████████████         | 61 kB 14.6 MB/s eta 0:00:01[K     |██████████████████████████▊     | 71 kB 11.2 MB/s eta 0:00:01[K     |██████████████████████████████▌ | 81 kB 12.2 MB/s eta 0:00:01[K     |████████████████████████████████| 85 kB 3.6 MB/s 
[?25h

In [24]:
from matplotlib import pyplot as plt
import pandas as pd
from typing import Dict, Text

In [2]:
import numpy as np

import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

# Prepare data

In [3]:
ratings = tfds.load("movielens/100k-ratings", split="train")

[1mDownloading and preparing dataset movielens/100k-ratings/0.1.0 (download: 4.70 MiB, generated: 32.41 MiB, total: 37.10 MiB) to /root/tensorflow_datasets/movielens/100k-ratings/0.1.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]






0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/movielens/100k-ratings/0.1.0.incompleteIES00M/movielens-train.tfrecord


  0%|          | 0/100000 [00:00<?, ? examples/s]

[1mDataset movielens downloaded and prepared to /root/tensorflow_datasets/movielens/100k-ratings/0.1.0. Subsequent calls will reuse this data.[0m


In [5]:
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "user_rating": x["user_rating"]
})

In [6]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

In [7]:
movie_titles = ratings.batch(1_000_000).map(lambda x: x['movie_title'])
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])

unique_movie_titles = np.unique(np.concatenate(list(movie_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [11]:
rating_df = tfds.as_dataframe(ratings)

# Build model

In [8]:
class RankingModel(tf.keras.Model):
  def __init__(self):
    super().__init__()
    embedding_dimension = 32

    # Compute embeddings for users.
    self.user_embeddings = tf.keras.Sequential([
        tf.keras.layers.StringLookup(vocabulary=unique_user_ids, mask_token=None),
        tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])

    # Compute embeddings for movies.
    self.movie_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(vocabulary=unique_movie_titles, mask_token=None),
      tf.keras.layers.Embedding(len(unique_movie_titles) + 1, embedding_dimension)
    ])

    # Compute predictions
    self.ratings = tf.keras.Sequential([
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1)
    ])

  def call(self, inputs):
    user_id, movie_title = inputs

    user_embedding = self.user_embeddings(user_id)
    movie_embedding = self.movie_embeddings(movie_title)

    return self.ratings(tf.concat([
      user_embedding, movie_embedding
    ], axis=1))

In [21]:
sample = rating_df.sample(1)
movie_title = [sample['movie_title'].iloc[0].decode('utf-8')]
user_id = [sample['user_id'].iloc[0].decode('utf-8')]

In [22]:
# Test
RankingModel()((user_id, movie_title))

Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[-0.01140478]], dtype=float32)>

In [23]:
task = tfrs.tasks.Ranking(
    loss=tf.keras.losses.MeanSquaredError(),
    metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [26]:
class MovielensModel(tfrs.models.Model):
  def __init__(self):
    super().__init__()
    self.ranking_model: tf.keras.Model = RankingModel()
    self.task: tf.keras.layers.Layer = task

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    return self.ranking_model(
        (features['user_id'], features['movie_title'])
    )

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    labels = features.pop('user_rating')

    rating_predictions = self(features)

    return self.task(labels=labels, predictions=rating_predictions)

In [27]:
# Train the model
model = MovielensModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [28]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [30]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fc812393fd0>

In [31]:
model.evaluate(cached_test, return_dict=True)



{'loss': 1.1097450256347656,
 'regularization_loss': 0,
 'root_mean_squared_error': 1.062164068222046,
 'total_loss': 1.1097450256347656}

# Inference

In [41]:
test_ratings = {}
test_movie_titles = [item.decode('utf-8') for item in rating_df.sample(3)['movie_title'].tolist()]
for movie_title in test_movie_titles:
  test_ratings[movie_title] = model({
      'user_id': np.array(['42']),
      "movie_title": np.array([movie_title])
  })

In [45]:
for title, score in sorted(test_ratings.items(), key=lambda x: x[1], reverse=True):
  print('Title: {}, Score: {}'.format(title, score))

Title: Star Wars (1977), Score: [[3.98456]]
Title: Addams Family Values (1993), Score: [[3.6088617]]
Title: To Have, or Not (1995), Score: [[3.4643583]]


# Serving

In [46]:
tf.saved_model.save(model, 'model')



INFO:tensorflow:Assets written to: model/assets


INFO:tensorflow:Assets written to: model/assets


In [47]:
loaded = tf.saved_model.load('model')

In [51]:
score = loaded({
    'user_id': np.array(['42']), 'movie_title': [movie_title]
})

print('Title: {}, Score: {}'.format(movie_title, score.numpy()))

Title: To Have, or Not (1995), Score: [[3.4643583]]
