# Imports

In [3]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

import numpy as np
from typing import Dict, Text

  from .autonotebook import tqdm as notebook_tqdm


# Get features, train/test split, get titles and ids, and get uniques 

In [4]:
ratings = tfds.load('movielens/100k-ratings', split='train')

ratings = ratings.map(lambda x: {
    'movie_title': x['movie_title'],
    'user_id': x['user_id'],
    'user_rating': x['user_rating']
})

tf.random.set_seed(42)
shuffled = ratings.shuffle(100000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80000)
test = shuffled.skip(80000).take(20000)

movie_titles = ratings.batch(1000000).map(lambda x: x['movie_title'])
user_ids = ratings.batch(1000000).map(lambda x: x['user_id'])

uniq_movie_titles = np.unique(np.concatenate(list(movie_titles)))
uniq_user_ids = np.unique(np.concatenate(list(user_ids)))

In [5]:
list(test)

[{'movie_title': <tf.Tensor: shape=(), dtype=string, numpy=b'M*A*S*H (1970)'>,
  'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'346'>,
  'user_rating': <tf.Tensor: shape=(), dtype=float32, numpy=4.0>},
 {'movie_title': <tf.Tensor: shape=(), dtype=string, numpy=b'Volcano (1997)'>,
  'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'602'>,
  'user_rating': <tf.Tensor: shape=(), dtype=float32, numpy=4.0>},
 {'movie_title': <tf.Tensor: shape=(), dtype=string, numpy=b'2001: A Space Odyssey (1968)'>,
  'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'393'>,
  'user_rating': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>},
 {'movie_title': <tf.Tensor: shape=(), dtype=string, numpy=b'Dances with Wolves (1990)'>,
  'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'152'>,
  'user_rating': <tf.Tensor: shape=(), dtype=float32, numpy=5.0>},
 {'movie_title': <tf.Tensor: shape=(), dtype=string, numpy=b'Speed (1994)'>,
  'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'7

In [52]:
class RankingModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        embedding_dimension = 32

        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(vocabulary=uniq_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(uniq_user_ids) + 1, embedding_dimension)
        ])

        self.movie_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(vocabulary=uniq_movie_titles, mask_token=None),
            tf.keras.layers.Embedding(len(uniq_movie_titles) + 1, embedding_dimension)
        ])

        self.rating = tf.keras.Sequential([
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1)
        ])

    def call(self, inputs):
        user_id, movie_title = inputs

        user_embedding = self.user_embedding(user_id)
        movie_embedding = self.movie_embedding(movie_title)

        return self.rating(tf.concat([user_embedding, movie_embedding], axis=1))

In [53]:
RankingModel()((["42"], ["One Flew Over the Cuckoo's Nest (1975)"]))
# 0.0184 is the prediction of how much user 42 would rate the movie specified.









<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.0066986]], dtype=float32)>

In [60]:
class MovieLensModel(tfrs.models.Model):
    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.Model = RankingModel()
        self.task : tf.keras.layers.Layer = tfrs.tasks.Ranking(
            loss=tf.keras.losses.MeanSquaredError(),
            metrics=[tf.keras.metrics.RootMeanSquaredError()]
        )
    
    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        rating_prediction = self.ranking_model(
            (features['user_id'], features['movie_title']))
        
        return self.task(labels=features['user_rating'], predictions=rating_prediction)

In [61]:
model = MovieLensModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

cached_train = train.batch(8192).cache()
cached_test = test.batch(4096).cache()

model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x15d87660160>

In [78]:
test_ratings = {}
for m in test.take(5):
    test_ratings[m['movie_title'].numpy()] = RankingModel()((["42"], [m["movie_title"]]))

for m in sorted(test_ratings, key=test_ratings.get, reverse=True):
    print(m)









































b'Speed (1994)'
b'M*A*S*H (1970)'
b'Volcano (1997)'
b'Dances with Wolves (1990)'
b'2001: A Space Odyssey (1968)'
