In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from utils import *

In [13]:
df_movies = pd.read_csv("./dataset/movies.csv", delimiter=',', quotechar='"')
df_ratings = pd.read_csv("./dataset/ratings.csv")
df_movies.index = range(1,df_movies.shape[0]+1)

Y, R = prepare_y_r(df_movies, df_ratings)

In [14]:
Ynorm, Ymean = normalize_ratings(Y, R)

num_movies, num_users = Y.shape
num_features = 20

In [20]:
R = R.values

In [21]:
type(R)

numpy.ndarray

### Collaborative filtering cost function

The collaborative filtering cost function is given by
$$J({\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},...,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}})= \left[ \frac{1}{2}\sum_{(i,j):r(i,j)=1}(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+ \underbrace{\left[
\frac{\lambda}{2}
\sum_{j=0}^{n_u-1}\sum_{k=0}^{n-1}(\mathbf{w}^{(j)}_k)^2
+ \frac{\lambda}{2}\sum_{i=0}^{n_m-1}\sum_{k=0}^{n-1}(\mathbf{x}_k^{(i)})^2
\right]}_{regularization}
\tag{1}$$
The first summation in (1) is "for all $i$, $j$ where $r(i,j)$ equals $1$" and could be written:

$$
= \left[ \frac{1}{2}\sum_{j=0}^{n_u-1} \sum_{i=0}^{n_m-1}r(i,j)*(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+\text{regularization}
$$

In [22]:
def cost_function(W, X, b, Y, R, lambda_):
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J

In [53]:
def cost_func(X, W, b, Y, R, lambda_):
    j = (np.dot(X, W.T) + b - Y) * R
    J = 0.5 * np.sum(j**2) + 0.5 * lambda_ * (np.sum(X**2) + np.sum(W**2))
    return J

In [36]:
np.random.seed(1234)

W = tf.Variable(np.random.randn(num_users, num_features), name='W')
X = tf.Variable(np.random.randn(num_movies, num_features), name='X')
b = tf.Variable(np.random.randn(1, num_users), name='b')

In [None]:
cost_func(X.numpy(), W.numpy(), b.numpy(), Ynorm, R, 1)

In [43]:
cost_function(W, X, b, Ynorm, R, 1)

<tf.Tensor: shape=(), dtype=float64, numpy=nan>

In [27]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-1)

num_iter = 200
lambda_ = 1
for i in range(num_iter):
    with tf.GradientTape() as tape:
        cost = cost_function(W, X, b, Ynorm, R, lambda_)

    grads = tape.gradient(cost, [W, X, b])

    optimizer.apply_gradients(zip(grads, [W, X, b]))

    if i % 20 == 0:
        print(f"Training loss at {i} : {cost:0.2f}")

Training loss at 0 : nan
Training loss at 20 : nan


KeyboardInterrupt: 