## Disclaimer
This code is written after Andrew Ng's ML Specialization for my own study purpose.

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

In [4]:
# w (443, 10): 443 users with 10 features
w_data = pd.read_csv('./data/small_movies_W.csv', header=None)
w_data = np.array(w_data)

# b (443, 1): bias parameters for 443 users
b_data = pd.read_csv('./data/small_movies_b.csv', header=None)
b_data = np.array(b_data.T)
b_data = b_data.reshape(len(b_data), )

# x (4778. 10): 4778 movies with 10 features
x_data = pd.read_csv('./data/small_movies_X.csv', header=None)
x_data = np.array(x_data)

# y (4778, 443): 4778 movies rated by 443 users
y_data = pd.read_csv('./data/small_movies_Y.csv', header=None)
y_data = np.array(y_data)

# r (4778, 443): wheter a movie has been rated
r_data = pd.read_csv('./data/small_movies_R.csv', header=None)
r_data = np.array(r_data)

In [99]:
print(f'w (users)\t {w_data.shape}')
print(f'b (users)\t {b_data.shape}')
print(f'x (movies)\t {x_data.shape}')
print(f'y (ratings)\t {y_data.shape}')
print(f'r (rated)\t {r_data.shape}')

w (users)	 (443, 10)
b (users)	 (443,)
x (movies)	 (4778, 10)
y (ratings)	 (4778, 443)
r (rated)	 (4778, 443)


## Cost function

The cost function is given by 
$$J = \frac{1}{2} \sum_{(i,j): r(i,j)=1}(w^jx^i + b^j - y^{i,j})^2 + \frac{\lambda}{2}
\sum_{j=1}^{n_u}\sum_{k=1}^{n}(\mathbf{w}^{j}_k)^2
+ \frac{\lambda}{2}\sum_{i=1}^{n_m}\sum_{k=1}^{n}(\mathbf{x}_k^{i})^2$$ 

- $n_u$: number of users
- $n_m$: number of movies
- $n$: number of features


In [100]:
def cost(x, w, b, y, r, lambda_):
    nm, nu = y.shape

    J = 0
    for i in range(nm):
        for j in range(nu):
            sq_dif = r[i, j] * (w[j] @ x[i] + b[j] - y[i, j]) ** 2
            J += sq_dif
    J += lambda_ * (np.sum(np.square(w)) + np.sum(np.square(x)))
    J /= 2

    return J

# vectorized implementation
def cost_v(x, w, b, y, r, lambda_):
    j = (tf.linalg.matmul(x, tf.transpose(w)) + b - y)*r
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(x**2) + tf.reduce_sum(w**2))
    return J

In [101]:
def test_cost_fn(cost_fn):
    nu = 4
    nm = 5 
    n = 3

    x = np.ones((nm, n))
    w = np.ones((nu, n))
    b = np.zeros((nu, ))
    y = np.zeros((nm, nu))
    r = np.zeros((nm, nu))

    J = cost_fn(x, w, b, y, r, 2)
    assert np.isclose(J , 27)


In [106]:
test_cost_fn(cost)
test_cost_fn(cost_v)

## Learning recommendations

In [63]:
def load_movies():
    """ returns df with and index of movies in the order they are in in the Y matrix """
    df = pd.read_csv('./data/small_movie_list.csv', header=0, index_col=0,  delimiter=',', quotechar='"')
    mlist = df["title"].to_list()
    return(mlist, df)


movieList, movieList_df = load_movies()

my_ratings = np.zeros(len(movieList))          #  Initialize my ratings

# Check the file small_movie_list.csv for id of each movie in our dataset
# For example, Toy Story 3 (2010) has ID 2700, so to rate it "5", you can set
my_ratings[2700] = 5 

#Or suppose you did not enjoy Persuasion (2007), you can set
my_ratings[2609] = 2;

# We have selected a few movies we liked / did not like and the ratings we
# gave are as follows:
my_ratings[929]  = 5   # Lord of the Rings: The Return of the King, The
my_ratings[246]  = 5   # Shrek (2001)
my_ratings[2716] = 3   # Inception
my_ratings[1150] = 5   # Incredibles, The (2004)
my_ratings[382]  = 2   # Amelie (Fabuleux destin d'Amélie Poulain, Le)
my_ratings[366]  = 5   # Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
my_ratings[622]  = 5   # Harry Potter and the Chamber of Secrets (2002)
my_ratings[988]  = 3   # Eternal Sunshine of the Spotless Mind (2004)
my_ratings[2925] = 1   # Louis Theroux: Law & Disorder (2008)
my_ratings[2937] = 1   # Nothing to Declare (Rien à déclarer)
my_ratings[793]  = 5   # Pirates of the Caribbean: The Curse of the Black Pearl (2003)
my_rated = [i for i in range(len(my_ratings)) if my_ratings[i] > 0]

print('\nNew user ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0 :
        print(f'Rated {my_ratings[i]} for  {movieList_df.loc[i,"title"]}');


New user ratings:

Rated 5.0 for  Shrek (2001)
Rated 5.0 for  Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Rated 2.0 for  Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
Rated 5.0 for  Harry Potter and the Chamber of Secrets (2002)
Rated 5.0 for  Pirates of the Caribbean: The Curse of the Black Pearl (2003)
Rated 5.0 for  Lord of the Rings: The Return of the King, The (2003)
Rated 3.0 for  Eternal Sunshine of the Spotless Mind (2004)
Rated 5.0 for  Incredibles, The (2004)
Rated 2.0 for  Persuasion (2007)
Rated 5.0 for  Toy Story 3 (2010)
Rated 3.0 for  Inception (2010)
Rated 1.0 for  Louis Theroux: Law & Disorder (2008)
Rated 1.0 for  Nothing to Declare (Rien à déclarer) (2010)


In [107]:
def normalizeRatings(Y, R):
    Ymean = (np.sum(Y*R,axis=1)/(np.sum(R, axis=1)+1e-12)).reshape(-1,1)
    Ynorm = Y - np.multiply(Ymean, R)
    return(Ynorm, Ymean)

# Reload ratings and add new ratings
Y    = np.c_[my_ratings, y_data]
R    = np.c_[(my_ratings != 0).astype(int), r_data]

# Normalize the Dataset
Ynorm, Ymean = normalizeRatings(Y, R)

Initialize learning parameters.

In [104]:
nm, nu = Ynorm.shape
n = 100

w = tf.Variable(tf.random.normal((nu, n), dtype=tf.float64), name='w')
b = tf.Variable(tf.random.normal((nu, ), dtype=tf.float64), name='b')
x = tf.Variable(tf.random.normal((nm, n), dtype=tf.float64), name='x')

optimizer = tf.keras.optimizers.Adam(learning_rate=0.2)

Normalize data.

### Run gradient descent
Use `GradientTape` to help calculate derivatives of the cost function and `Adam` optimizer to apply gradients to existing values of $x$, $w$, and $b$.

In [105]:
n_iter = 500
lambda_ = 1

for i in range(n_iter):
    with tf.GradientTape() as tape:
        cost_value = cost_v(x, w, b, Ynorm, R, lambda_)

    gradients = tape.gradient(cost_value, [x, w, b])

    optimizer.apply_gradients(zip(gradients, [x, w, b]))

    if i % 50 == 0:
        print(f'{cost_value:0.1f}')

2262367.1
14603.8
2657.1
1962.8
1852.0
1814.0
1795.4
1784.7
1778.2
1773.8


In [112]:
# Make a prediction using trained weights and biases
p = np.matmul(x.numpy(), np.transpose(w.numpy())) + b.numpy()

#restore the mean
pm = p + Ymean

my_predictions = pm[:,0]

# sort predictions
sorted_idx = tf.argsort(my_predictions, direction='DESCENDING')

# First 20 movies
for i in range(20):
    movie_idx = sorted_idx[i]
    if movie_idx not in my_rated:
        print(f'Predicting rating {my_predictions[movie_idx]:0.2f} for movie {movieList[movie_idx]}')

print('\n\nOriginal vs Predicted ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Original {my_ratings[i]}, Predicted {my_predictions[i]:0.2f} for {movieList[i]}')

Predicting rating 4.73 for movie Colourful (Karafuru) (2010)
Predicting rating 4.58 for movie Battle Royale 2: Requiem (Batoru rowaiaru II: Chinkonka) (2003)
Predicting rating 4.58 for movie Eichmann (2007)
Predicting rating 4.58 for movie Into the Abyss (2011)
Predicting rating 4.58 for movie One I Love, The (2014)
Predicting rating 4.58 for movie Laggies (2014)
Predicting rating 4.58 for movie Delirium (2014)
Predicting rating 4.56 for movie 'Salem's Lot (2004)
Predicting rating 4.56 for movie Particle Fever (2013)
Predicting rating 4.55 for movie Kung Fu Panda: Secrets of the Masters (2011)
Predicting rating 4.54 for movie Seve (2014)
Predicting rating 4.53 for movie Battle For Sevastopol (2015)
Predicting rating 4.53 for movie Che: Part One (2008)


Original vs Predicted ratings:

Original 5, Predicted 5 for Shrek (2001)
Original 5, Predicted 5 for Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Original 2, Predicted 2 for Amelie (Fabu