In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from recsys_utils import *

In [5]:

X, W, b, num_books, num_features, num_users = load_precalc_params_small()
Y, R = load_ratings_small()

print("Y", Y.shape, "R", R.shape)
print("X", X.shape)
print("W", W.shape)
print("b", b.shape)
print("num_features", num_features)
print("num_books",   num_books)
print("num_users",    num_users)

Y (4778, 443) R (4778, 443)
X (4778, 10)
W (443, 10)
b (1, 443)
num_features 10
num_books 4778
num_users 443


In [6]:
#  From the matrix, we can compute statistics like average rating.
tsmean =  np.mean(Y[0, R[0, :].astype(bool)])
print(f"Average rating for book 1 : {tsmean:0.3f} / 5" )

Average rating for book 1 : 3.400 / 5


In [12]:

num_users_r = 4
num_books_r = 5 
num_features_r = 3

X_r = X[:num_books_r, :num_features_r]
W_r = W[:num_users_r,  :num_features_r]
b_r = b[0, :num_users_r].reshape(1,-1)
Y_r = Y[:num_books_r, :num_users_r]
R_r = R[:num_books_r, :num_users_r]


J = cofi_cost_func(X_r, W_r, b_r, Y_r, R_r, 0);
print(f"Cost: {J:0.2f}")

Cost: 13.67


In [13]:

J = cofi_cost_func(X_r, W_r, b_r, Y_r, R_r, 1.5);
print(f"Cost with regularization: {J:0.2f}")

Cost with regularization: 28.09


In [14]:
def cofi_cost_func_v(X, W, b, Y, R, lambda_):
    
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J

In [15]:

J = cofi_cost_func_v(X_r, W_r, b_r, Y_r, R_r, 0);
print(f"Cost: {J:0.2f}")


J = cofi_cost_func_v(X_r, W_r, b_r, Y_r, R_r, 1.5);
print(f"Cost (with regularization): {J:0.2f}")

Cost: 13.67
Cost (with regularization): 28.09


In [16]:
bookList, bookList_df = load_Movie_List_pd()

my_ratings = np.zeros(num_movies)          
my_ratings[2700] = 5 


my_ratings[2609] = 2;


my_ratings[929]  = 5   
my_ratings[246]  = 5   
my_ratings[2716] = 3   
my_ratings[1150] = 5   
my_ratings[382]  = 2   
my_ratings[366]  = 5   
my_ratings[622]  = 5   
my_ratings[988]  = 3   
my_ratings[2925] = 1   
my_ratings[2937] = 1   
my_ratings[793]  = 5   
my_rated = [i for i in range(len(my_ratings)) if my_ratings[i] > 0]

print('\nNew user ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0 :
        print(f'Rated {my_ratings[i]} for  {bookList_df.loc[i,"title"]}');


New user ratings:

Rated 5.0 for  Shrek (2001)
Rated 5.0 for  Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Rated 2.0 for  Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
Rated 5.0 for  Harry Potter and the Chamber of Secrets (2002)
Rated 5.0 for  Pirates of the Caribbean: The Curse of the Black Pearl (2003)
Rated 5.0 for  Lord of the Rings: The Return of the King, The (2003)
Rated 3.0 for  Eternal Sunshine of the Spotless Mind (2004)
Rated 5.0 for  Incredibles, The (2004)
Rated 2.0 for  Persuasion (2007)
Rated 5.0 for  Toy Story 3 (2010)
Rated 3.0 for  Inception (2010)
Rated 1.0 for  Louis Theroux: Law & Disorder (2008)
Rated 1.0 for  Nothing to Declare (Rien à déclarer) (2010)


Now, let's add these reviews to $Y$ and $R$ and normalize the ratings.

In [17]:

Y, R = load_ratings_small()


Y = np.c_[my_ratings, Y]


R = np.c_[(my_ratings != 0).astype(int), R]


Ynorm, Ymean = normalizeRatings(Y, R)

In [25]:

num_books, num_users = Y.shape
num_features = 100


tf.random.set_seed(123456) 
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1,          num_users),   dtype=tf.float64),  name='b')


optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [21]:
iterations = 200
lambda_ = 1
for iter in range(iterations):
    
    with tf.GradientTape() as tape:

        
        cost_value = cofi_cost_func_v(X, W, b, Ynorm, R, lambda_)

    
    grads = tape.gradient( cost_value, [X,W,b] )

    
    optimizer.apply_gradients( zip(grads, [X,W,b]) )

    
    if iter % 10 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.2f}")

Training loss at iteration 0: 2566.61
Training loss at iteration 10: 2446.39
Training loss at iteration 20: 2348.73
Training loss at iteration 30: 2268.78
Training loss at iteration 40: 2202.87
Training loss at iteration 50: 2148.14
Training loss at iteration 60: 2102.37
Training loss at iteration 70: 2063.85
Training loss at iteration 80: 2031.19
Training loss at iteration 90: 2003.34
Training loss at iteration 100: 1979.44
Training loss at iteration 110: 1958.79
Training loss at iteration 120: 1940.86
Training loss at iteration 130: 1925.19
Training loss at iteration 140: 1911.43
Training loss at iteration 150: 1899.29
Training loss at iteration 160: 1888.52
Training loss at iteration 170: 1878.92
Training loss at iteration 180: 1870.33
Training loss at iteration 190: 1862.61


In [22]:

p = np.matmul(X.numpy(), np.transpose(W.numpy())) + b.numpy()


pm = p + Ymean

my_predictions = pm[:,0]


ix = tf.argsort(my_predictions, direction='DESCENDING')

for i in range(17):
    j = ix[i]
    if j not in my_rated:
        print(f'Predicting rating {my_predictions[j]:0.2f} for book {bookList[j]}')

print('\n\nOriginal vs Predicted ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Original {my_ratings[i]}, Predicted {my_predictions[i]:0.2f} for {bookList[i]}')

Predicting rating 4.74 for movie Colourful (Karafuru) (2010)
Predicting rating 4.58 for movie One I Love, The (2014)
Predicting rating 4.58 for movie Laggies (2014)
Predicting rating 4.58 for movie Delirium (2014)
Predicting rating 4.55 for movie Into the Forest of Fireflies' Light (2011)
Predicting rating 4.54 for movie Particle Fever (2013)
Predicting rating 4.54 for movie Martin Lawrence Live: Runteldat (2002)
Predicting rating 4.54 for movie Battle Royale 2: Requiem (Batoru rowaiaru II: Chinkonka) (2003)
Predicting rating 4.54 for movie Into the Abyss (2011)
Predicting rating 4.54 for movie Eichmann (2007)


Original vs Predicted ratings:

Original 5.0, Predicted 4.89 for Shrek (2001)
Original 5.0, Predicted 4.86 for Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Original 2.0, Predicted 2.18 for Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
Original 5.0, Predicted 4.85 for Harry Potter and the Chamber of Secrets (2002)
Original