<a href="https://colab.research.google.com/github/efcor/tf-sandbox/blob/main/model-attempt-1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow_recommenders

Collecting tensorflow_recommenders
  Downloading tensorflow_recommenders-0.7.3-py3-none-any.whl.metadata (4.6 kB)
Downloading tensorflow_recommenders-0.7.3-py3-none-any.whl (96 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorflow_recommenders
Successfully installed tensorflow_recommenders-0.7.3


In [3]:
import tensorflow_datasets as tfds

ratings = tfds.load("movielens/100k-ratings", split="train")

ratings = ratings.map(lambda x: {
  "movie_title": x["movie_title"],
  "user_id": x["user_id"],
  "user_rating": x["user_rating"]
})

Downloading and preparing dataset 4.70 MiB (download: 4.70 MiB, generated: 32.41 MiB, total: 37.10 MiB) to /root/tensorflow_datasets/movielens/100k-ratings/0.1.1...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/100000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/movielens/100k-ratings/incomplete.7HAS88_0.1.1/movielens-train.tfrecord*..…

Dataset movielens downloaded and prepared to /root/tensorflow_datasets/movielens/100k-ratings/0.1.1. Subsequent calls will reuse this data.


In [4]:
import tensorflow as tf
import numpy as np

tf.random.set_seed(524)
shuffled = ratings.shuffle(100_000, seed=524, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

movie_titles = ratings.batch(100_000).map(lambda x: x["movie_title"])
user_ids = ratings.batch(100_000).map(lambda x: x["user_id"])

unique_movie_titles = np.unique(np.concatenate(list(movie_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [5]:
class RankingModel(tf.keras.Model):
  def __init__(self):
    super().__init__()
    embedding_dimension = 32

    # Compute the embeddings for users
    self.user_embeddings = tf.keras.Sequential([
        tf.keras.layers.StringLookup(vocabulary=unique_user_ids, mask_token=None),
        tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])

    # Compute the embeddings for movies
    self.movie_embeddings = tf.keras.Sequential([
        tf.keras.layers.StringLookup(vocabulary=unique_movie_titles, mask_token=None),
        tf.keras.layers.Embedding(len(unique_movie_titles) + 1, embedding_dimension)
    ])

    # Make a ratings model by using a few dense layers to predict ratings
    self.ratings = tf.keras.Sequential([
      # Learn multiple dense layers.
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.Dense(64, activation="relu"),
      # Make rating predictions in the final layer.
      tf.keras.layers.Dense(1)
    ])

  # In the call method, concat the user and movies into a single vector, and
  # then pass that vector through the ratings model we created in self.ratings
  def call(self, inputs):
    user_id, movie_title = inputs
    user_embedding = self.user_embeddings(user_id)
    movie_embedding = self.movie_embeddings(movie_title)
    return self.ratings(tf.concat([user_embedding, movie_embedding], axis=1))

In [8]:
RankingModel()((tf.constant(["42"]), tf.constant(["One Flew Over the Cuckoo's Nest (1975)"])))

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.01674954]], dtype=float32)>

In [23]:
import tensorflow_recommenders as tfrs
from typing import Dict, Text


In [24]:
class MovielensModel(tfrs.models.Model):
  def __init__(self):
    super().__init__()
    self.ranking_model: tf.keras.Model = RankingModel()
    self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
      loss = tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    rating_predictions = self.ranking_model(
       (features["user_id"], features["movie_title"]))

    # The task computes the loss and the metrics.
    return self.task(labels=features["user_rating"], predictions=rating_predictions)



In [25]:
model = MovielensModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

model.fit(cached_train, epochs=3)

Epoch 1/3
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 58ms/step - loss: 4.2455 - regularization_loss: 0.0000e+00 - root_mean_squared_error: 2.6098 - total_loss: 4.2455
Epoch 2/3
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 1.2466 - regularization_loss: 0.0000e+00 - root_mean_squared_error: 1.1205 - total_loss: 1.2466
Epoch 3/3
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - loss: 1.2273 - regularization_loss: 0.0000e+00 - root_mean_squared_error: 1.1126 - total_loss: 1.2273


<keras.src.callbacks.history.History at 0x7db3b79d42e0>

In [26]:
test_ratings = {}
# for m in test.take(5):
#   test_ratings[m["movie_title"].numpy()] = RankingModel()((tf.constant(["42"]), tf.constant([m["movie_title"]])))

# for m in sorted(test_ratings, key=test_ratings.get, reverse=True):
#   print(m)