In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from utils import *

In [3]:
df_movies = pd.read_csv("./dataset/movies.csv", delimiter=',', quotechar='"')
df_ratings = pd.read_csv("./dataset/ratings.csv")
df_movies.index = range(1,df_movies.shape[0]+1)

Y, R = prepare_y_r(df_movies, df_ratings)

In [4]:
Ynorm, Ymean = normalize_ratings(Y, R)

num_movies, num_users = Y.shape
num_features = 20

In [5]:
R = R.values

In [6]:
np.isnan(Ynorm).any()

False

### Collaborative filtering cost function

The collaborative filtering cost function is given by
$$J({\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},...,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}})= \left[ \frac{1}{2}\sum_{j=0}^{n_u-1} \sum_{i=0}^{n_m-1}r(i,j)*(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+ \underbrace{\left[
\frac{\lambda}{2}
\sum_{j=0}^{n_u-1}\sum_{k=0}^{n-1}(\mathbf{w}^{(j)}_k)^2
+ \frac{\lambda}{2}\sum_{i=0}^{n_m-1}\sum_{k=0}^{n-1}(\mathbf{x}_k^{(i)})^2
\right]}_{regularization}
$$


In [12]:
def cost_function(W, X, b, Y, R, lambda_):
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y) * R
    J = 0.5 * tf.reduce_sum(tf.square(j)) + (lambda_/2) * (tf.reduce_sum(tf.square(X)) + tf.reduce_sum(tf.square(W)))
    return J

In [13]:
np.random.seed(1234)

W = tf.Variable(np.random.randn(num_users, num_features), name='W')
X = tf.Variable(np.random.randn(num_movies, num_features), name='X')
b = tf.Variable(np.random.randn(1, num_users), name='b')

In [14]:
J = cost_function(W, X, b, Ynorm, R, 1)
J

<tf.Tensor: shape=(), dtype=float64, numpy=1199520.2174030645>

In [16]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-1)

num_iter = 1000
lambda_ = 1
for i in range(num_iter):
    with tf.GradientTape() as tape:
        cost = cost_function(W, X, b, Ynorm, R, lambda_)

    grads = tape.gradient(cost, [W, X, b])

    optimizer.apply_gradients(zip(grads, [W, X, b]))

    if i % 20 == 0:
        print(f"Training loss at {i} : {cost:0.2f}")

Training loss at 0 : 10793.01
Training loss at 20 : 10346.97
Training loss at 40 : 9876.10
Training loss at 60 : 9734.79
Training loss at 80 : 9672.06
Training loss at 100 : 9628.07
Training loss at 120 : 9594.80
Training loss at 140 : 9570.48
Training loss at 160 : 9553.57
Training loss at 180 : 9541.67
Training loss at 200 : 9527.37
Training loss at 220 : 9519.81
Training loss at 240 : 9514.47
Training loss at 260 : 9495.09
Training loss at 280 : 9495.94
Training loss at 300 : 9483.51
Training loss at 320 : 9475.36
Training loss at 340 : 9473.76
Training loss at 360 : 9460.55
Training loss at 380 : 9468.25
Training loss at 400 : 9462.40
Training loss at 420 : 9457.62
Training loss at 440 : 9443.47
Training loss at 460 : 9438.78
Training loss at 480 : 9445.48
Training loss at 500 : 9436.23
Training loss at 520 : 9433.36
Training loss at 540 : 9440.96
Training loss at 560 : 9442.55
Training loss at 580 : 9428.99
Training loss at 600 : 9447.13
Training loss at 620 : 9426.48
Training los

In [4]:
tf.test.is_built_with_cuda()

False