<center><h1>Movie Recommender System 📽️🍿<h3> by Andrew Ng </h3></h1></center>

In [None]:
import numpy as np
from numpy import loadtxt

import tensorflow as tf
from tensorflow import keras

import pandas as pd

In [19]:
#Load data

file = open('./data/small_movies_X.csv', 'rb')
X = loadtxt(file, delimiter = ",")

file = open('./data/small_movies_W.csv', 'rb')
W = loadtxt(file,delimiter = ",")

file = open('./data/small_movies_b.csv', 'rb')

b = loadtxt(file,delimiter = ",")
b = b.reshape(1,-1)

num_movies, num_features = X.shape
num_users,_ = W.shape

file = open('./data/small_movies_Y.csv', 'rb')
Y = loadtxt(file,delimiter = ",")

file = open('./data/small_movies_R.csv', 'rb')
R = loadtxt(file,delimiter = ",")


In [20]:
print("Y", Y.shape, "R", R.shape)
print("X", X.shape)
print("W", W.shape)
print("b", b.shape)
print("num_features", num_features)
print("num_movies",   num_movies)
print("num_users",    num_users)

Y (4778, 443) R (4778, 443)
X (4778, 10)
W (443, 10)
b (1, 443)
num_features 10
num_movies 4778
num_users 443


In [21]:
def cofi_cost_func_v(X, W, b, Y, R, lambda_):

    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J

In [22]:

movieList_df = pd.read_csv('./data/small_movie_list.csv', header=0, index_col=0,  delimiter=',', quotechar='"')
movieList = movieList_df["title"].to_list()

my_ratings = np.zeros(num_movies)   

my_ratings[4238] = 5 # the circle
my_ratings[2575] = 5 # sherlock holmes
my_ratings[1139] = 3 # the grudge
my_ratings[3336] = 5 # django unchained
my_ratings[785] = 5  # 28 days later
my_ratings[3082] = 5 # hunger Games
my_ratings[4089] = 4 # fantastic beasts

my_rated = [i for i in range(len(my_ratings)) if my_ratings[i] > 0]

print('\nNew user ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0 :
        print(f'Rated {my_ratings[i]} for  {movieList_df.loc[i,"title"]}');


New user ratings:

Rated 5.0 for  28 Days Later (2002)
Rated 3.0 for  Grudge, The (2004)
Rated 5.0 for  Sherlock Holmes (2009)
Rated 5.0 for  The Hunger Games (2012)
Rated 5.0 for  Django Unchained (2012)
Rated 4.0 for  Fantastic Beasts and Where to Find Them (2016)
Rated 5.0 for  The Circle (2016)


In [23]:
# Reload ratings and add new ratings
file = open('./data/small_movies_Y.csv', 'rb')
Y = loadtxt(file,delimiter = ",")

file = open('./data/small_movies_R.csv', 'rb')
R = loadtxt(file,delimiter = ",")


Y    = np.c_[my_ratings, Y]
R    = np.c_[(my_ratings != 0).astype(int), R]

# Normalize the Dataset
Ymean = (np.sum(Y*R,axis=1)/(np.sum(R, axis=1)+1e-12)).reshape(-1,1)
Ynorm = Y - np.multiply(Ymean, R) 

In [24]:
#  Useful Values
num_movies, num_users = Y.shape
num_features = 100

# Set Initial Parameters (W, X), use tf.Variable to track these variables
tf.random.set_seed(1234) # for consistent results
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1,          num_users),   dtype=tf.float64),  name='b')

# Instantiate an optimizer.
optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [25]:
iterations = 200
lambda_ = 1
for iter in range(iterations):

    with tf.GradientTape() as tape:


        cost_value = cofi_cost_func_v(X, W, b, Ynorm, R, lambda_)

    grads = tape.gradient( cost_value, [X,W,b] )

    optimizer.apply_gradients( zip(grads, [X,W,b]) )

    if iter % 20 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 2321121.9
Training loss at iteration 20: 136160.5
Training loss at iteration 40: 51854.1
Training loss at iteration 60: 24594.4
Training loss at iteration 80: 13628.2
Training loss at iteration 100: 8486.2
Training loss at iteration 120: 5806.5
Training loss at iteration 140: 4310.5
Training loss at iteration 160: 3434.2
Training loss at iteration 180: 2900.9


In [27]:
# Make a prediction using trained weights and biases
p = np.matmul(X.numpy(), np.transpose(W.numpy())) + b.numpy()

#restore the mean
pm = p + Ymean
my_predictions = pm[:,0]

# sort predictions
ix = tf.argsort(my_predictions, direction='DESCENDING')

for i in range(17):
    j = ix[i]
    if j not in my_rated:
        print(f'Predicting rating {my_predictions[j]:0.2f} for movie {movieList[j]}')

print('\n\nOriginal vs Predicted ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Original {my_ratings[i]}, Predicted {my_predictions[i]:0.2f} for {movieList[i]}')

Predicting rating 5.87 for movie Martin Lawrence Live: Runteldat (2002)
Predicting rating 5.86 for movie My Sassy Girl (Yeopgijeogin geunyeo) (2001)
Predicting rating 5.85 for movie The Girl with All the Gifts (2016)
Predicting rating 5.85 for movie Bossa Nova (2000)
Predicting rating 5.85 for movie Dragons: Gift of the Night Fury (2011)
Predicting rating 5.85 for movie Son of the Bride (Hijo de la novia, El) (2001)
Predicting rating 5.85 for movie Delirium (2014)
Predicting rating 5.85 for movie Laggies (2014)
Predicting rating 5.85 for movie One I Love, The (2014)
Predicting rating 5.85 for movie Rivers and Tides (2001)
Predicting rating 5.85 for movie Ex Drummer (2007)
Predicting rating 5.85 for movie Particle Fever (2013)
Predicting rating 5.84 for movie 61* (2001)
Predicting rating 5.84 for movie Eva (2011)
Predicting rating 5.84 for movie Wonder Woman (2009)
Predicting rating 5.84 for movie Paper Birds (Pájaros de papel) (2010)
Predicting rating 5.84 for movie Superman/Batman: Pu