# Collaborative Filtering Recommender Systems

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load data
movies_df = pd.read_csv('movies.csv')
ratings_df = pd.read_csv('ratings.csv')

# Pre-processing index
unique_movie_ids = ratings_df['movieId'].unique()
unique_user_ids = ratings_df['userId'].unique()
movie_to_idx = {original_id: i for i, original_id in enumerate(unique_movie_ids)}
user_to_idx = {original_id: i for i, original_id in enumerate(unique_user_ids)}

# Dimension matrix
num_movies = len(unique_movie_ids)
num_users = len(unique_user_ids)


# Initialize Y R.
Y = np.zeros((num_movies, num_users))
R = np.zeros((num_movies, num_users))


for index, row in ratings_df.iterrows():
    original_movie_id = row['movieId']
    original_user_id = row['userId']
    rating = row['rating']
    movie_idx = movie_to_idx[original_movie_id]
    user_idx = user_to_idx[original_user_id]
    Y[movie_idx, user_idx] = rating
    R[movie_idx, user_idx] = 1

print(f"Y : {Y.shape}")
print(f"R : {R.shape}")

In [None]:
# Mean Normalization for Y : 
# Y_mean : average of a movie from many people's perspective.
# Y_norm = Y_real - Y_mean
# handle problem : "Cold Start".

def normalize_ratings(Y, R):
    num_movies, num_users = Y.shape
    
    # Initialize
    Y_mean = np.zeros((num_movies, 1))
    Y_norm = np.zeros((num_movies, num_users))
    
    for i in range(num_movies):
        idx = np.where(R[i, :] == 1)[0]
        
        if len(idx) > 0:
            mean_val = np.mean(Y[i, idx])
            Y_mean[i] = mean_val
            
            Y_norm[i, idx] = Y[i, idx] - mean_val
        else:
            Y_mean[i] = 0
            
    return Y_norm, Y_mean

In [None]:
num_features = 10 # number of features.
# Each row is a film, contains 10 features.
X = np.random.randn(num_movies, num_features) * 0.1
# Each row is a person, contains 10 prefers.
W = np.random.randn(num_users, num_features) * 0.1
# Bias
b = np.random.randn(1, num_users) * 0.1


$J({\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},...,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}})= \left[ \frac{1}{2}\sum_{(i,j):r(i,j)=1}(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+ \underbrace{\left[
\frac{\lambda}{2}
\sum_{j=0}^{n_u-1}\sum_{k=0}^{n-1}(\mathbf{w}^{(j)}_k)^2
+ \frac{\lambda}{2}\sum_{i=0}^{n_m-1}\sum_{k=0}^{n-1}(\mathbf{x}_k^{(i)})^2
\right]}_{regularization}$

Other way : 

$= \left[ \frac{1}{2}\sum_{j=0}^{n_u-1} \sum_{i=0}^{n_m-1}r(i,j)*(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+\text{regularization}
$


In [None]:
def cost_function(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    nm, nu = Y.shape
    J = 0
    Prediction_Error = 0.0
    for i in range(nu) : 
        for j in range(nm) : 
            Prediction_Error += ( R[j, i] * ( np.dot(W[i, : ],X[j, : ]) + b[0, i] - Y[j, i] )**2  )
    Prediction_Error *= 0.5
    
    Regularization = 0.0
    for j in range(nu) : 
        for k in range(len(W[0, :])) : 
            Regularization += W[j, k]**2
            
    for i in range(nm) : 
        for k in range(len(X[0, :])) : 
            Regularization += X[i, k]**2
    
    Regularization *= (lambda_ / 2)
    J += (Prediction_Error + Regularization)

    return J

In [None]:
num_users_r = 4
num_movies_r = 5 
num_features_r = 3

X_r = X[:num_movies_r, :num_features_r]
W_r = W[:num_users_r,  :num_features_r]
b_r = b[0, :num_users_r].reshape(1,-1)
Y_r = Y[:num_movies_r, :num_users_r]
R_r = R[:num_movies_r, :num_users_r]

# Evaluate cost function no regularization
J = cost_function(X_r, W_r, b_r, Y_r, R_r, 0);
print(f"Cost: {J:0.2f}")
# Evaluate cost function with regularization 
J = cost_function(X_r, W_r, b_r, Y_r, R_r, 1.5);
print(f"Cost (with regularization): {J:0.2f}")

In [None]:
def cost_function_vectorized(X, W, b, Y, R, lambda_):
    # X (Movies x Features) @ W.T (Features x Users) -> (Movies x Users)
    prediction = np.dot(X, W.T) + b
    
    diff = (prediction - Y)
    J = 0.5 * np.sum((diff * R) ** 2)
    
    #Regularization
    reg_term = (lambda_ / 2) * (np.sum(W**2) + np.sum(X**2))
    
    J = J + reg_term
    
    return J

$E_{ij} = r(i,j)\,\big( (W X^\top)_{j,i} + b_j - Y_{i,j} \big)$

$\frac{\partial J}{\partial X} = (\text{Error} \cdot W) + \lambda X$  

$\frac{\partial J}{\partial W} = (\text{Error}^T \cdot X) + \lambda W$  

$\frac{\partial J}{\partial b} = \sum \text{Error}$

In [None]:
def compute_gradient(X, W, b, Y, R, lambda_) : 
    prediction = np.dot(X, W.T) + b
    error = (prediction - Y) * R # skip empty cell
    
    grad_X = np.dot(error,W) + (lambda_ * X)
    grad_W = np.dot(error.T, X) + (lambda_ * W)
    grad_b = np.sum(error, axis=0, keepdims=True)

    return grad_X, grad_W, grad_b

In [None]:
def gradient_descent(X, W, b, Y, R, max_iters, lr, lambda_) :
    J_history = []

    for i in range(max_iters): 
        
        grad_X, grad_W, grad_b = compute_gradient(X, W, b, Y, R, lambda_) 
        X = X - lr * grad_X
        W = W - lr * grad_W
        b = b - lr * grad_b

        if i % 50 == 0:
            cost = cost_function_vectorized(X, W, b, Y, R, lambda_)
            J_history.append(cost)
            print(f"Iteration {i:4d}: Cost {cost:8.2f}")


    return X, W, b, J_history


In [None]:
# def stochastic_gradient_descent(X, W, b, Y, R, max_epochs, alpha, lambda_):
#     idxs = np.argwhere(R == 1)
#     J_history = []
#     for epoch in range(max_epochs):
#         # Shuffle
#         np.random.shuffle(idxs)
        
#         for (i, j) in idxs:
#             # i: Movie index, j: User index
            
#             prediction = np.dot(X[i], W[j]) + b[0, j]
#             error = prediction - Y[i, j]
            

#             w_j_old = W[j].copy()
#             x_i_old = X[i].copy()
            
#             # Update W[j]
#             W[j] = w_j_old - alpha * (error * x_i_old + lambda_ * w_j_old)
            
#             # Update X[i]
#             X[i] = x_i_old - alpha * (error * w_j_old + lambda_ * x_i_old)
            
#             # Update b[j]
#             b[0, j] = b[0, j] - alpha * error
            

#         if epoch % 5 == 0:
#             cost = cost_function_vectorized(X, W, b, Y, R, lambda_)
#             J_history.append(cost)
#             print(f"Epoch {epoch:4d}: Cost {cost:8.2f}")
            
#     return X, W, b, J_history

In [None]:
lr = 1e-4         # Learning Rate
lambda_ = 1       # Regularization
alpha = 0.001
iterations = 300  
max_epochs = 20
Y_norm, Y_mean = normalize_ratings(Y, R)

X_train, W_train, b_train, J_hist = gradient_descent(X, W, b, Y_norm, R, iterations, lr, lambda_)
print(J_hist)

In [None]:
plt.plot(J_hist)
plt.xlabel("Iterations ")
plt.ylabel("Cost")
plt.show()

In [None]:
p_norm = np.dot(X_train, W_train.T) + b_train
p_final = np.clip(p_norm + Y_mean, 0.5, 5.0)

In [None]:
# test w user has index = 0
my_user_id = 0

# get the list of movies this person HAS watched and rated highly (>= 4 stars)
# use the original Y matrix
rated_indices = np.where(Y[:, my_user_id] >= 2)[0]

print(f"\n--- USER {my_user_id} LIKED THE MOVIES ---")
for idx in rated_indices[:10]: 
    original_id = list(movie_to_idx.keys())[list(movie_to_idx.values()).index(idx)]
    title = movies_df[movies_df['movieId'] == original_id]['title'].values[0]
    print(f"{title} (Rated: {Y[idx, my_user_id]})")

# get the list of movies this machine suggest
# filter unwatched movies (R = 0)
prediction_for_user = p_final[:, my_user_id]
# assign watched movies score = -1 to not suggest again
prediction_for_user[R[:, my_user_id] == 1] = -1 

# ranking top 10 : 
top_10_indices = np.argsort(prediction_for_user)[-10:][::-1]

print(f"\n--- SUGGESTION SYSTEM FOR USER {my_user_id} ---")
for idx in top_10_indices:
    original_id = list(movie_to_idx.keys())[list(movie_to_idx.values()).index(idx)]
    title = movies_df[movies_df['movieId'] == original_id]['title'].values[0]
    predicted_score = prediction_for_user[idx]
    print(f"{title} (Prediction: {predicted_score:.1f} star)")

## USE TENSORFLOW

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
def cofi_cost_func_tf(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Vectorized for speed. Uses tensorflow operations to be compatible with custom training loop.
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J

In [None]:
# Evaluate cost function
J = cofi_cost_func_tf(X, W, b, Y, R, 0);
print(f"Cost: {J:0.2f}")

# Evaluate cost function with regularization 
J = cofi_cost_func_tf(X, W, b, Y, R, 1.5);
print(f"Cost (with regularization): {J:0.2f}")

In [None]:
#  Useful Values
num_movies, num_users = Y.shape
num_features = 100

# Set Initial Parameters (W, X), use tf.Variable to track these variables
tf.random.set_seed(1234) # for consistent results
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1,          num_users),   dtype=tf.float64),  name='b')

# Instantiate an optimizer.
optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [None]:
iterations = 200
lambda_ = 1
for iter in range(iterations):
    # Use TensorFlowâ€™s GradientTape
    # to record the operations used to compute the cost 
    with tf.GradientTape() as tape:

        # Compute the cost (forward pass included in cost)
        cost_value = cofi_cost_func_tf(X, W, b, Y_norm, R, lambda_)

    # Use the gradient tape to automatically retrieve
    # the gradients of the trainable variables with respect to the loss
    grads = tape.gradient( cost_value, [X,W,b] )

    # Run one step of gradient descent by updating
    # the value of the variables to minimize the loss.
    optimizer.apply_gradients( zip(grads, [X,W,b]) )

    # Log periodically.
    if iter % 20 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.1f}")