In [19]:
# importing libraries 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import tensorflow as tf 
import warnings
warnings.simplefilter(action='ignore', category=RuntimeWarning)




In [11]:
# Load Datasets 

links   = pd.read_csv("/kaggle/input/movies/ml-latest-small/links.csv")
movies  = pd.read_csv("/kaggle/input/movies/ml-latest-small/movies.csv")
ratings = pd.read_csv("/kaggle/input/movies/ml-latest-small/ratings.csv")
tags    = pd.read_csv("/kaggle/input/movies/ml-latest-small/tags.csv")

#print(links.head())
#print(movies.head())
#print(ratings.head())
#print(tags.head())


data = pd.merge(ratings,movies,on = 'movieId', how = 'left')
print(data.head())

   userId  movieId  rating  timestamp                        title  \
0       1        1     4.0  964982703             Toy Story (1995)   
1       1        3     4.0  964981247      Grumpier Old Men (1995)   
2       1        6     4.0  964982224                  Heat (1995)   
3       1       47     5.0  964983815  Seven (a.k.a. Se7en) (1995)   
4       1       50     5.0  964982931   Usual Suspects, The (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                               Comedy|Romance  
2                        Action|Crime|Thriller  
3                             Mystery|Thriller  
4                       Crime|Mystery|Thriller  


In [14]:
movie_user_matrix = ratings.pivot(index = "movieId", columns = "userId", values = "rating")
print(movie_user_matrix.head())

userId   1    2    3    4    5    6    7    8    9    10   ...  601  602  603  \
movieId                                                    ...                  
1        4.0  NaN  NaN  NaN  4.0  NaN  4.5  NaN  NaN  NaN  ...  4.0  NaN  4.0   
2        NaN  NaN  NaN  NaN  NaN  4.0  NaN  4.0  NaN  NaN  ...  NaN  4.0  NaN   
3        4.0  NaN  NaN  NaN  NaN  5.0  NaN  NaN  NaN  NaN  ...  NaN  NaN  NaN   
4        NaN  NaN  NaN  NaN  NaN  3.0  NaN  NaN  NaN  NaN  ...  NaN  NaN  NaN   
5        NaN  NaN  NaN  NaN  NaN  5.0  NaN  NaN  NaN  NaN  ...  NaN  NaN  NaN   

userId   604  605  606  607  608  609  610  
movieId                                     
1        3.0  4.0  2.5  4.0  2.5  3.0  5.0  
2        5.0  3.5  NaN  NaN  2.0  NaN  NaN  
3        NaN  NaN  NaN  NaN  2.0  NaN  NaN  
4        NaN  NaN  NaN  NaN  NaN  NaN  NaN  
5        3.0  NaN  NaN  NaN  NaN  NaN  NaN  

[5 rows x 610 columns]


Symbol	Meaning	Shape / Notes

X = Movie feature/ genres matrix(num_movies × num_features)

Y =	Movie vs User rating matrix	(num_movies × num_users) → actual ratings

W = Parameter matrix(num_movies × num_features) → weights for features (learned)

b = Bias term(num_movies × num_users) or (num_movies, num_users) depending on model

R = Indicator matrix(num_movies × num_users) → 1 if rating exists, 0 if missing

In [17]:
# Create Y AND R matrices 

Y = movie_user_matrix.values 
print(f"shape of movie vs User rating matrix: {Y.shape}")
R = (~np.isnan(Y)).astype(float)
print(f"shape of Indicator matrix: {R.shape}")

shape of movie vs User rating matrix: (9724, 610)
shape of Indicator matrix: (9724, 610)


In [22]:
# Initializes X and W and b (genres and parameters W and b)

num_movies, num_users = Y.shape 
num_features = 10 
tf.random.set_seed(1234)
W = tf.Variable(tf.random.normal(shape = (num_users, num_features),mean = 0.0, stddev = 0.01, dtype=tf.float64), 
                dtype = tf.float64)

X = tf.Variable(tf.random.normal(shape = (num_movies, num_features),mean = 0.0, stddev = 0.01, dtype=tf.float64),
                dtype = tf.float64)

b = tf.Variable(tf.random.normal(shape = (1,num_users),mean = 0.0, stddev = 0.01, dtype=tf.float64),
                dtype = tf.float64)


                
print(f"shape of parameter matrix: {W.shape}")
print(f"shape of movie_features/genres matrix: {X.shape}")
print(f"shape of bias term: {b.shape}")

shape of parameter matrix: (610, 10)
shape of movie_features/genres matrix: (9724, 10)
shape of bias term: (1, 610)


In [24]:
# Mean Normalization for movie ratings 

global_mean = np.nanmean(Y)
Y_norm = np.where(np.isnan(Y), 0, Y-global_mean)
print(Y_norm.shape)

(9724, 610)


In [26]:
def cofi_cost_function(X,Y_norm,W,b,R,lambda_):
    J =  ( tf.linalg.matmul(X,tf.transpose(W)) + b - Y_norm )*R
    J = 0.5 * tf.reduce_sum(J**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J 

In [34]:
optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-1)
iterations = 200
lambda_ = 1 
for iter in range(iterations):
    with tf.GradientTape() as tape:
        cost_value = cofi_cost_function(X,Y_norm,W,b,R,lambda_)
    grads = tape.gradient( cost_value, [X,W,b])
    optimizer.apply_gradients( zip(grads, [X,W,b]))

    if iter % 20 == 0:
        print(f"Training Loss at Iteration {iter}: {cost_value:0.1f}")


Training Loss at Iteration 0: 16736.6
Training Loss at Iteration 20: 16319.4
Training Loss at Iteration 40: 15994.0
Training Loss at Iteration 60: 15949.7
Training Loss at Iteration 80: 15938.5
Training Loss at Iteration 100: 15932.6
Training Loss at Iteration 120: 15928.9
Training Loss at Iteration 140: 15926.7
Training Loss at Iteration 160: 15925.4
Training Loss at Iteration 180: 15925.1


In [48]:
user_id = 0 
my_rated = np.where(~np.isnan(Y[:,user_id]))[0]
movieList = data['title'].tolist()
print(movieList[:10])
my_ratings = Y[my_rated,user_id]
print(my_ratings.shape)

['Toy Story (1995)', 'Grumpier Old Men (1995)', 'Heat (1995)', 'Seven (a.k.a. Se7en) (1995)', 'Usual Suspects, The (1995)', 'From Dusk Till Dawn (1996)', 'Bottle Rocket (1996)', 'Braveheart (1995)', 'Rob Roy (1995)', 'Canadian Bacon (1995)']
(232,)


In [44]:
# Normalized Predictions
Y_pred_norm = np.matmul(X.numpy(), np.transpose(W.numpy())) + b.numpy()

Y_pred = Y_pred_norm + global_mean 

user_id = 0 
my_predictions = Y_pred[:,user_id]
my_predictions = np.clip(my_predictions, 1, 5)

ix = tf.argsort(my_predictions, direction = 'DESCENDING')

print("Top Recommendations:\n")

for i in range(10):
    j = ix[i].numpy()
    if j not in my_rated:
        print(f"Predicting Rating {my_predictions[j]:0.2f} for movie {movieList[j]}")
    




Top Recommendations:

Predicting Rating 5.00 for movie Usual Suspects, The (1995)
Predicting Rating 5.00 for movie Ed Wood (1994)
Predicting Rating 5.00 for movie Fugitive, The (1993)
Predicting Rating 5.00 for movie Bedknobs and Broomsticks (1971)
Predicting Rating 5.00 for movie Monty Python's Life of Brian (1979)
Predicting Rating 5.00 for movie E.T. the Extra-Terrestrial (1982)
Predicting Rating 5.00 for movie Clockwork Orange, A (1971)
Predicting Rating 5.00 for movie Bambi (1942)
Predicting Rating 5.00 for movie American History X (1998)
Predicting Rating 5.00 for movie Iron Giant, The (1999)


In [47]:
print("Original Vs Predicted ratings:\n")
for i in range(len(my_rated)):
    if my_ratings[i] > 0 :
        print(f"Original: {my_ratings[i]}, Predicted: {my_predictions[i]}:0.1f for {movieList[i]}")

Original Vs Predicted ratings:

Original: 4.0, Predicted: 4.1613683512372575:0.1f for Toy Story (1995)
Original: 4.0, Predicted: 3.947490256350471:0.1f for Grumpier Old Men (1995)
Original: 4.0, Predicted: 4.290364234874387:0.1f for Heat (1995)
Original: 5.0, Predicted: 3.6316042621393914:0.1f for Seven (a.k.a. Se7en) (1995)
Original: 5.0, Predicted: 5.0:0.1f for Usual Suspects, The (1995)
Original: 3.0, Predicted: 4.098338313775113:0.1f for From Dusk Till Dawn (1996)
Original: 5.0, Predicted: 3.0728233050871716:0.1f for Bottle Rocket (1996)
Original: 4.0, Predicted: 3.9581110199619767:0.1f for Braveheart (1995)
Original: 5.0, Predicted: 4.45868269428645:0.1f for Rob Roy (1995)
Original: 5.0, Predicted: 4.380740058482061:0.1f for Canadian Bacon (1995)
Original: 5.0, Predicted: 4.473297970082842:0.1f for Desperado (1995)
Original: 5.0, Predicted: 2.7040607393048974:0.1f for Billy Madison (1995)
Original: 3.0, Predicted: 4.223510152763725:0.1f for Clerks (1994)
Original: 5.0, Predicted: 