In [4]:
import numpy as np 
import matplotlib.pyplot as plt 
import tensorflow as tf 
from tensorflow import keras 
from recsys_utils import *

In [5]:
X, W, b, num_movies, num_features, num_users = load_precalc_params_small()
Y,R =  load_ratings_small()
print(f"Y:{Y.shape}\n R:{R.shape}\nW:{W.shape}\nb:{b.shape}\nnum_features:{num_features}\nnum_users:{num_users}\nnum_movies:{num_movies}")

Y:(4778, 443)
 R:(4778, 443)
W:(443, 10)
b:(1, 443)
num_features:10
num_users:443
num_movies:4778


In [6]:
tsmean = np.mean(Y[0,R[0,:].astype(bool)])
print(f"Average rating of movies:{tsmean}")

Average rating of movies:3.4


In [7]:
def compute_cost(X,W,b,Y,R,lambda_):
    nm,nu = Y.shape
    J = 0
    for j in range(nu):
        w = W[j,:]
        b_j = b[0,j] 
        for i in range(nm):
            x = X[i,:]
            y = Y[i,j]
            r = R[i,j]
            J += np.square((np.dot(w,x)+b_j)-y)    
    J += (lambda_)*(np.sum(np.square(W)) + np.square(np.sum(X)))
    J /= 2
    return J

In [9]:
# Reduce the data set size so that this runs faster
num_users_r = 4
num_movies_r = 5 
num_features_r = 3

X_r = X[:num_movies_r, :num_features_r]
W_r = W[:num_users_r,  :num_features_r]
b_r = b[0, :num_users_r].reshape(1,-1)
Y_r = Y[:num_movies_r, :num_users_r]
R_r = R[:num_movies_r, :num_users_r]

# Evaluate cost function
J = compute_cost(X_r, W_r, b_r, Y_r, R_r, 0)
print(f"Cost: {J:0.2f}")


Cost: 18.45


In [10]:
def compute_cost_vectorised(X, W, b, Y, R, lambda_):
    j = (tf.linalg.matmul(X,tf.transpose(W)) + b - Y)*R
    j = 0.5*(tf.reduce_sum(j**2)) + (lambda_/2)*((tf.reduce_sum(W**2)) + (tf.reduce_sum(X**2)))
    return j

In [14]:
cost = compute_cost_vectorised(X_r,W_r,b_r,Y_r,R_r,0)
print(cost.numpy())

13.670725805579915


In [15]:
movieList, movieList_df = load_Movie_List_pd()

my_ratings = np.zeros(num_movies)          #  Initialize my ratings

# Check the file small_movie_list.csv for id of each movie in our dataset
# For example, Toy Story 3 (2010) has ID 2700, so to rate it "5", you can set
my_ratings[2700] = 5 

#Or suppose you did not enjoy Persuasion (2007), you can set
my_ratings[2609] = 2;

# We have selected a few movies we liked / did not like and the ratings we
# gave are as follows:
my_ratings[929]  = 5   # Lord of the Rings: The Return of the King, The
my_ratings[246]  = 5   # Shrek (2001)
my_ratings[2716] = 3   # Inception
my_ratings[1150] = 5   # Incredibles, The (2004)
my_ratings[382]  = 2   # Amelie (Fabuleux destin d'Amélie Poulain, Le)
my_ratings[366]  = 5   # Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
my_ratings[622]  = 5   # Harry Potter and the Chamber of Secrets (2002)
my_ratings[988]  = 3   # Eternal Sunshine of the Spotless Mind (2004)
my_ratings[2925] = 1   # Louis Theroux: Law & Disorder (2008)
my_ratings[2937] = 1   # Nothing to Declare (Rien à déclarer)
my_ratings[793]  = 5   # Pirates of the Caribbean: The Curse of the Black Pearl (2003)
my_rated = [i for i in range(len(my_ratings)) if my_ratings[i] > 0]

print('\nNew user ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0 :
        print(f'Rated {my_ratings[i]} for  {movieList_df.loc[i,"title"]}');


New user ratings:

Rated 5.0 for  Shrek (2001)
Rated 5.0 for  Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Rated 2.0 for  Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
Rated 5.0 for  Harry Potter and the Chamber of Secrets (2002)
Rated 5.0 for  Pirates of the Caribbean: The Curse of the Black Pearl (2003)
Rated 5.0 for  Lord of the Rings: The Return of the King, The (2003)
Rated 3.0 for  Eternal Sunshine of the Spotless Mind (2004)
Rated 5.0 for  Incredibles, The (2004)
Rated 2.0 for  Persuasion (2007)
Rated 5.0 for  Toy Story 3 (2010)
Rated 3.0 for  Inception (2010)
Rated 1.0 for  Louis Theroux: Law & Disorder (2008)
Rated 1.0 for  Nothing to Declare (Rien à déclarer) (2010)


In [21]:
Y,R = load_ratings_small()
Y = np.c_[my_ratings,Y]
R = np.c_[(my_ratings != 0).astype(int),R]
Ynorm,Ymean = normalizeRatings(Y,R)

In [25]:
num_users,num_movies = Y.shape 
num_features = 100
tf.random.set_seed(1234)
W = tf.Variable(tf.random.normal((num_users,num_features),dtype=tf.float64),name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='X')
b =  tf.Variable(tf.random.normal((1,num_users),dtype=tf.float64),name='b')
optimizer = keras.optimizers.Adam(learning_rate=1e-1)