## Objective: Rank relevant candidates based on rating scores

In [14]:
import pandas as pd
import tensorflow as tf
import keras 
import tensorflow_recommenders as tfrs 
from typing import Dict, Text
import numpy as np

In [3]:
# load the data source
user_movie_title_rating_data = pd.read_csv(
    './sliced_user_id_and_movie_title_data.csv'
    )
user_movie_title_rating_data.head()

Unnamed: 0,userID,movieID,originalTitle,rating,review date
0,ur4592644,tt0120884,When the Light Comes,10,16 January 2005
1,ur3174947,tt0118688,Batman & Robin,3,16 January 2005
2,ur3780035,tt0387887,Bottom Live 2003: Weapons Grade Y-Fronts Tour,8,16 January 2005
3,ur4592628,tt0346491,Alexander,1,16 January 2005
4,ur3174947,tt0094721,Beetlejuice,8,16 January 2005


In [27]:
user_movie_title_rating_data['originalTitle'][:3]

0                             When the Light Comes
1                                   Batman & Robin
2    Bottom Live 2003: Weapons Grade Y-Fronts Tour
Name: originalTitle, dtype: object

In [6]:
# create a new movie title list that enforces type uniformity in its values
# might come in handy when creaying the movie title vocalbulary
updated_movie_titles = []
for i in user_movie_title_rating_data['originalTitle']:
    if type(i) != 'str':
        a = str(i)
        updated_movie_titles.append(a)
    else:
        updated_movie_titles.append(i)

user_movie_title_rating_data['originalTitle'] = updated_movie_titles
user_movie_title_rating_data['originalTitle']

0                                 When the Light Comes
1                                       Batman & Robin
2        Bottom Live 2003: Weapons Grade Y-Fronts Tour
3                                            Alexander
4                                          Beetlejuice
                             ...                      
49995               The Five People You Meet in Heaven
49996                                         Sin City
49997                                         Earthsea
49998                                           L√©olo
49999                                           Sahara
Name: originalTitle, Length: 50000, dtype: object

In [7]:
# convert the dataframe to a tensor dataset
user_movie_title_rating_data_tensor = tf.data.Dataset.from_tensor_slices(user_movie_title_rating_data.to_dict(orient='list'))

In [8]:
# accessing just the needed features and creating a tensor dataset dict
user_id_title_rating_tf_data = user_movie_title_rating_data_tensor.map(lambda x: 
                                                                       {
                                                                           'userID': x['userID'],
                                                                           'originalTitle': x['originalTitle'],
                                                                           'rating': x['rating']
                                                                       })

In [9]:
# splitting the data into train, test and val
tf.random.set_seed(20)

shuffled = user_id_title_rating_tf_data.shuffle(50000, seed=20, reshuffle_each_iteration=False)

train = shuffled.take(30000)
test =  shuffled.skip(30000).take(10000)
val =  shuffled.skip(40000).take(10000)

In [13]:
# getting unique user ids and movie titles
user_ids = user_id_title_rating_tf_data.batch(1000).map(lambda x: x['userID'])
movie_titles =  user_id_title_rating_tf_data.batch(1000).map(lambda x: x['originalTitle'])

unique_user_ids = np.unique(np.concatenate(list(user_ids)))
unique_movie_titles = np.unique(np.concatenate(list(movie_titles)))

In [None]:
# Building the ranking model from keras base model

class RankingModel(tf.keras.Model):
    def __init__(self):
        super().__init__() # calling the superclass init method to initialize all attributes and functions
        embedding_dimension =  32

        # creating the user embeddings
        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(max_tokens=None, vocabulary= unique_user_ids),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension )
        ])

        # creating the movie embeddings
        self.movie_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(max_tokens=None, vocabulary= unique_movie_titles),
            tf.keras.layers.Embedding(len(unique_movie_titles) + 1, embedding_dimension )
        ])

        # to compute predictions
        self.rating = tf.keras.Sequential([
            # Learn multiple dense layers.
            tf.keras.layers.Dense(256, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            # Make rating predictions in the final layer.
            tf.keras.layers.Dense(1)
        ])

    def call(self, inputs):
        user_id, movie_title =  inputs

        # create the embeddings 
        user_embeddings = self.user_embedding(user_id)
        movie_embeddings = self.movie_embedding(movie_title)

        return self.rating(tf.concat([user_embeddings, movie_embeddings], axis=1))



In [None]:
# testing our model to see if it generates a rating value
RankingModel()((['ur4592644'], ['When the Light Comes']))

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[-0.00846763]], dtype=float32)>

In [20]:
# defining our loss and metrics
tfrs.tasks.Ranking(
    loss = tf.keras.losses.MeanSquaredError(),
    metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

<tensorflow_recommenders.tasks.ranking.Ranking at 0x1485bc4f0>

##  Complete Model Architecture with loss function and metrics

In [21]:
class MovieModel(tfrs.models.Model):
    def __init__(self):
        # call the super class init method
        super().__init__()
        self.ranking: tf.keras.Model = RankingModel()
        self.task: tf.keras.layers.Layer =  tfrs.tasks.Ranking(
                            loss = tf.keras.losses.MeanSquaredError(),
                            metrics=[tf.keras.metrics.RootMeanSquaredError()]
                        )
        
    def call(self, inputs: dict[str, tf.Tensor]):
        return self.ranking((inputs['userID'], inputs['originalTitle']))
    
        
    def compute_loss(self, inputs: dict[Text, tf.Tensor], training = False):
        true_labels = inputs.pop('rating')
        predicted_labels =  self.call(inputs=inputs)

        # computing the loss and the metrics
        return self.task(labels = true_labels, predictions = predicted_labels)

In [23]:
# instantiate and compile model
model =  MovieModel()
model.compile(optimizer=tf.keras.optimizers.legacy.Adagrad(learning_rate=0.1))

In [24]:
# creating a cached version of our train, test and val data
cached_train = train.batch(1000).cache()
cached_test =  test.batch(500).cache()
cached_val =  val.batch(500).cache()

In [None]:
# training the model
model.fit(cached_train, validation_data=cached_val, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1485a96d0>

In [26]:
# evaluating our model
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 2.942007541656494,
 'loss': 8.940866470336914,
 'regularization_loss': 0,
 'total_loss': 8.940866470336914}

### NB:A lower rmse indicates a better accuracy at predicting ratings

In [30]:
# Testing our model on a set of movies and then ranking the movies based on the predictions

test_ratings = {}
test_movie_titles =['When the Light Comes', 'Batman & Robin', 'Bottom Live 2003: Weapons Grade Y-Fronts Tour']

for movies in test_movie_titles:
    test_ratings[movies] =  model({
        'userID': np.array(['ur4592644']),
        'originalTitle': np.array([movies])
    })

print('ratings:')
for movie_title, score in sorted(test_ratings.items(), key= lambda x : x[1], reverse=True):
    print(f'{movie_title}: {score}')



ratings:
Bottom Live 2003: Weapons Grade Y-Fronts Tour: [[6.107675]]
When the Light Comes: [[5.6205826]]
Batman & Robin: [[2.104976]]


In [38]:
# exporting our mode
for layer in model.layers:
    print(layer)

<__main__.RankingModel object at 0x1485b8820>
<tensorflow_recommenders.tasks.ranking.Ranking object at 0x1485a22b0>


In [44]:
# using model checkpointing as an alternative to model saving
checkpoint = tf.keras.callbacks.ModelCheckpoint('./ranking_model', save_best_only=True)
model.fit(cached_train, epochs=5, validation_data=cached_val, callbacks=[checkpoint])

Epoch 1/5


INFO:tensorflow:Assets written to: ./ranking_model/assets


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1481e77f0>

In [45]:
loaded_model =  tf.keras.models.load_model('./ranking_model')

In [46]:
# Testing our model on a set of movies and then ranking the movies based on the predictions

test_ratingss = {}
test_movie_titles =['When the Light Comes', 'Batman & Robin', 'Bottom Live 2003: Weapons Grade Y-Fronts Tour']

for movies in test_movie_titles:
    test_ratingss[movies] =  loaded_model({
        'userID': np.array(['ur4592644']),
        'originalTitle': np.array([movies])
    })

print('ratings:')
for movie_title, score in sorted(test_ratingss.items(), key= lambda x : x[1], reverse=True):
    print(f'{movie_title}: {score}')

ratings:
Bottom Live 2003: Weapons Grade Y-Fronts Tour: [[5.550411]]
When the Light Comes: [[5.1831145]]
Batman & Robin: [[1.3363197]]


In [49]:
converter = tf.lite.TFLiteConverter.from_saved_model('./ranking_model')
tf_lite_model = converter.convert()
open('converted_model.tflite', 'wb').write(tf_lite_model)

2025-03-27 17:54:55.909142: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2025-03-27 17:54:55.909318: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2025-03-27 17:54:55.912112: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: ./ranking_model
2025-03-27 17:54:55.914910: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2025-03-27 17:54:55.914914: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: ./ranking_model
2025-03-27 17:54:55.921444: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2025-03-27 17:54:55.922396: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2025-03-27 17:54:55.965601: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: ./ranking_model
2025-03-27

6917052