<a href="https://colab.research.google.com/github/efcor/tf-sandbox/blob/main/model-attempt-3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# The tutorial I'm following seems to have some breaking changes with recent
# versions, so force the runtime to downgrade tensorflow via uninstall and
# reinstall. Note: requires a restart of the runtime.
!pip uninstall tensorflow_recommenders
!pip uninstall tensorflow
!pip install tensorflow==2.12
!pip install -q tensorflow_recommenders

In [1]:
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

In [2]:
print("TensorFlow version:", tf.__version__)
print("Recommenders version:", tfrs.__version__)
# Good to go if this outputs:
# TensorFlow version: 2.12.0
# Recommenders version: v0.7.3

TensorFlow version: 2.12.0
Recommenders version: v0.7.3


In [3]:
ratings = tfds.load("movielens/100k-ratings", split="train")

ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "user_rating": x["user_rating"]
})

Downloading and preparing dataset 4.70 MiB (download: 4.70 MiB, generated: 32.41 MiB, total: 37.10 MiB) to /root/tensorflow_datasets/movielens/100k-ratings/0.1.1...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/100000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/movielens/100k-ratings/incomplete.YCOC4N_0.1.1/movielens-train.tfrecord*..…

Dataset movielens downloaded and prepared to /root/tensorflow_datasets/movielens/100k-ratings/0.1.1. Subsequent calls will reuse this data.


In [4]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

In [5]:
movie_titles = ratings.batch(1_000_000).map(lambda x: x["movie_title"])
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])

unique_movie_titles = np.unique(np.concatenate(list(movie_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [6]:
class RankingModel(tf.keras.Model):

  def __init__(self):
    super().__init__()
    embedding_dimension = 32

    # Compute embeddings for users.
    self.user_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_user_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])

    # Compute embeddings for movies.
    self.movie_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_movie_titles, mask_token=None),
      tf.keras.layers.Embedding(len(unique_movie_titles) + 1, embedding_dimension)
    ])

    # Compute predictions.
    self.ratings = tf.keras.Sequential([
      # Learn multiple dense layers.
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.Dense(64, activation="relu"),
      # Make rating predictions in the final layer.
      tf.keras.layers.Dense(1)
  ])

  def call(self, inputs):

    user_id, movie_title = inputs

    user_embedding = self.user_embeddings(user_id)
    movie_embedding = self.movie_embeddings(movie_title)

    return self.ratings(tf.concat([user_embedding, movie_embedding], axis=1))

In [7]:
RankingModel()((["42"], ["One Flew Over the Cuckoo's Nest (1975)"]))



<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.01659754]], dtype=float32)>

In [8]:
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [9]:
class MovielensModel(tfrs.models.Model):

  def __init__(self):
    super().__init__()
    self.ranking_model: tf.keras.Model = RankingModel()
    self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
      loss = tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    return self.ranking_model(
        (features["user_id"], features["movie_title"]))

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    labels = features.pop("user_rating")

    rating_predictions = self(features)

    # The task computes the loss and the metrics.
    return self.task(labels=labels, predictions=rating_predictions)

In [10]:
model = MovielensModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [11]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [12]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x79f82b8d25f0>

In [13]:
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.106919765472412,
 'loss': 1.2038668394088745,
 'regularization_loss': 0,
 'total_loss': 1.2038668394088745}

In [14]:
test_ratings = {}
test_movie_titles = ["M*A*S*H (1970)", "Dances with Wolves (1990)", "Speed (1994)"]
for movie_title in test_movie_titles:
  test_ratings[movie_title] = model({
      "user_id": np.array(["42"]),
      "movie_title": np.array([movie_title])
  })

print("Ratings:")
for title, score in sorted(test_ratings.items(), key=lambda x: x[1], reverse=True):
  print(f"{title}: {score}")

Ratings:
Dances with Wolves (1990): [[3.618568]]
M*A*S*H (1970): [[3.6054282]]
Speed (1994): [[3.5944457]]


In [None]:
# Export the query model. In Colab, this saves
# to /content/movie_recommender_model
tf.saved_model.save(model, 'movie_recommender_model')

# Zip the model folder so I can download it to move it to my server
!zip -r movie_recommender_model.zip movie_recommender_model

# Load it back; can also be done in TensorFlow Serving.
loaded = tf.saved_model.load('movie_recommender_model')

loaded({"user_id": np.array(["42"]), "movie_title": ["Speed (1994)"]}).numpy()

In [18]:
# Now I'm gonna try running a hundred movies through to see if it can handle it

loaded = tf.saved_model.load('movie_recommender_model')

my_ratings = {}
my_movie_titles = ['Toy Story (1995)','Jumanji (1995)','Grumpier Old Men (1995)','Waiting to Exhale (1995)','Father of the Bride Part II (1995)','Heat (1995)','Sabrina (1995)','Tom and Huck (1995)','Sudden Death (1995)','GoldenEye (1995)','American President, The (1995)','Dracula: Dead and Loving It (1995)','Balto (1995)','Nixon (1995)','Cutthroat Island (1995)','Casino (1995)','Sense and Sensibility (1995)','Four Rooms (1995)','Ace Ventura: When Nature Calls (1995)','Money Train (1995)','Get Shorty (1995)','Copycat (1995)','Assassins (1995)','Powder (1995)','Leaving Las Vegas (1995)','Othello (1995)','Now and Then (1995)','Persuasion (1995)','City of Lost Children, The (Cité des enfants perdus, La) (1995)','Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)','Dangerous Minds (1995)','Twelve Monkeys (a.k.a. 12 Monkeys) (1995)','Babe (1995)','Dead Man Walking (1995)','It Takes Two (1995)','Clueless (1995)','Cry, the Beloved Country (1995)','Richard III (1995)','Dead Presidents (1995)','Restoration (1995)','Mortal Kombat (1995)','To Die For (1995)','How to Make an American Quilt (1995)','Seven (a.k.a. Se7en) (1995)','Pocahontas (1995)','When Night Is Falling (1995)','Usual Suspects, The (1995)','Mighty Aphrodite (1995)','Lamerica (1994)','Big Green, The (1995)','Georgia (1995)','Home for the Holidays (1995)','Postman, The (Postino, Il) (1994)','Indian in the Cupboard, The (1995)','Eye for an Eye (1996)','Two if by Sea (1996)','Bio-Dome (1996)','Lawnmower Man 2: Beyond Cyberspace (1996)','French Twist (Gazon maudit) (1995)','Friday (1995)','From Dusk Till Dawn (1996)','Fair Game (1995)','Kicking and Screaming (1995)','Misérables, Les (1995)','Bed of Roses (1996)','Big Bully (1996)','Screamers (1995)','Nico Icon (1995)','Crossing Guard, The (1995)','Juror, The (1996)','White Balloon, The (Badkonake sefid) (1995)','Once Upon a Time... When We Were Colored (1995)','Angels and Insects (1995)','White Squall (1996)','Dunston Checks In (1996)','Black Sheep (1996)','Nick of Time (1995)','Mary Reilly (1996)','Vampire in Brooklyn (1995)','Beautiful Girls (1996)','Broken Arrow (1996)','In the Bleak Midwinter (1995)','Hate (Haine, La) (1995)','Heidi Fleiss: Hollywood Madam (1995)','City Hall (1996)','Bottle Rocket (1996)','Mr. Wrong (1996)','Unforgettable (1996)','Happy Gilmore (1996)','Bridges of Madison County, The (1995)','Nobody Loves Me (Keiner liebt mich) (1994)','Muppet Treasure Island (1996)','Catwalk (1996)','Braveheart (1995)','Taxi Driver (1976)','Rumble in the Bronx (Hont faan kui) (1995)']
for movie_title in my_movie_titles:
  my_ratings[movie_title] = loaded({
      "user_id": np.array(["42"]),
      "movie_title": np.array([movie_title])
  })

print("Ratings:")
for title, score in sorted(my_ratings.items(), key=lambda x: x[1], reverse=True):
  print(f"{title}: {score}")

Ratings:
Babe (1995): [[3.671849]]
Dead Man Walking (1995): [[3.658779]]
Friday (1995): [[3.634867]]
Assassins (1995): [[3.6308823]]
Money Train (1995): [[3.6289783]]
Copycat (1995): [[3.6258764]]
Hate (Haine, La) (1995): [[3.623355]]
Cutthroat Island (1995): [[3.6175802]]
Taxi Driver (1976): [[3.6169782]]
Sabrina (1995): [[3.616635]]
From Dusk Till Dawn (1996): [[3.6093884]]
Sense and Sensibility (1995): [[3.6056042]]
Now and Then (1995): [[3.6053486]]
Black Sheep (1996): [[3.6043665]]
Lamerica (1994): [[3.6033177]]
Lawnmower Man 2: Beyond Cyberspace (1996): [[3.6022308]]
Richard III (1995): [[3.5987382]]
Usual Suspects, The (1995): [[3.5981903]]
Braveheart (1995): [[3.5918171]]
City Hall (1996): [[3.5900671]]
Leaving Las Vegas (1995): [[3.5898123]]
In the Bleak Midwinter (1995): [[3.5884795]]
When Night Is Falling (1995): [[3.5867128]]
Clueless (1995): [[3.5860054]]
Dangerous Minds (1995): [[3.5812607]]
To Die For (1995): [[3.5806246]]
Toy Story (1995): [[3.579571]]
Tom and Huck (199