In [3]:
import numpy as np  
import tensorflow as tf
from tensorflow import keras
import pandas as pd

In [6]:
movies_df = pd.read_csv('./ml-latest-small/movies.csv')
ratings_df = pd.read_csv('./ml-latest-small/ratings.csv')


# Matricies 

Y = Ratings Matrix where the dimension is nm X nu
nm = number of movies and nu is the number of users 

R: binary indicator matrix where R[i,j]  = 1 if user j rated movie i 

X: Movie Feature Matrix where X is a nm X n matrix. nm is the number of movies and n is the number of latent features. 
Each row X^i represents the ith movie's feature vector. Initially this is random and this is updated to learn. 

W: User feature matrix. Where W is represented by nu X n. Where nu are the users and n is the feature matrix. The purpose of this matrix 
is to encode the feature representation of each user. Each row Wj represents the user's pregerence and is learned during the traning. 

B: bias user vecotr which represents the bias for each user. Example some users might rate movies more generously where others are more strict..

In [10]:
# Explore the movie and rating data 
print(movies_df.shape) 
print(movies_df.head(4))

(9742, 3)
   movieId                     title  \
0        1          Toy Story (1995)   
1        2            Jumanji (1995)   
2        3   Grumpier Old Men (1995)   
3        4  Waiting to Exhale (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  


In [11]:
# Explore the rating data 
print(ratings_df.shape) 
print(ratings_df.head(4))

(100836, 4)
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815


In [17]:
# Create Matrix Y with the user ratings
Y = ratings_df.pivot(index='movieId', columns='userId', values='rating').fillna(0)
print(Y.head(2))

userId   1    2    3    4    5    6    7    8    9    10   ...  601  602  603  \
movieId                                                    ...                  
1        4.0  0.0  0.0  0.0  4.0  0.0  4.5  0.0  0.0  0.0  ...  4.0  0.0  4.0   
2        0.0  0.0  0.0  0.0  0.0  4.0  0.0  4.0  0.0  0.0  ...  0.0  4.0  0.0   

userId   604  605  606  607  608  609  610  
movieId                                     
1        3.0  4.0  2.5  4.0  2.5  3.0  5.0  
2        5.0  3.5  0.0  0.0  2.0  0.0  0.0  

[2 rows x 610 columns]


In [16]:
# Create a matrix R which indicated if the movie has been rated 
R = (Y>0).astype(int)
print(R.head(2))

userId   1    2    3    4    5    6    7    8    9    10   ...  601  602  603  \
movieId                                                    ...                  
1          1    0    0    0    1    0    1    0    0    0  ...    1    0    1   
2          0    0    0    0    0    1    0    1    0    0  ...    0    1    0   

userId   604  605  606  607  608  609  610  
movieId                                     
1          1    1    1    1    1    1    1  
2          1    1    0    0    1    0    0  

[2 rows x 610 columns]


In [18]:
# Convert the matricies to numpy arrays 
Y = Y.values
R = R.values

In [30]:
# Define the initial features for X vector and W vector(each user gets a w vector) where X is the parameters for the movie ratings 
n_m, n_u = Y.shape
n = 10

X = np.random.randn(n_m, n) * 0.01
W = np.random.randn(n_u, n) * 0.01
b = np.zeros((n_u, 1))  # Start biases with zeros


In [31]:
print(X)
print(X.shape)

[[ 0.01294131  0.00067009 -0.00309424 ... -0.00538871 -0.00554665
  -0.01520644]
 [ 0.00441273  0.00475156  0.0002048  ... -0.00376273  0.00972508
  -0.00254479]
 [ 0.01234075 -0.00419073 -0.01123447 ...  0.01250099 -0.0114145
   0.00019858]
 ...
 [-0.01058137  0.02264905 -0.00919793 ... -0.00302029  0.02035576
  -0.01856   ]
 [ 0.01326253  0.00596476 -0.00681187 ...  0.00460272  0.00745181
   0.00042316]
 [ 0.00383306 -0.01417168  0.00262409 ...  0.00828204  0.01390035
  -0.01080509]]
(9724, 10)


In [49]:
def cofi_cost_func_v(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Vectorized for speed. Uses tensorflow operations to be compatible with custom training loop.
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J

In [50]:
#  Useful Values
num_movies, num_users = Y.shape
num_features = 100

# Set Initial Parameters (W, X), use tf.Variable to track these variables
tf.random.set_seed(1234) # for consistent results
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1,          num_users),   dtype=tf.float64),  name='b')

# Instantiate an optimizer.
optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [53]:
iterations = 200
lambda_ = 1
for iter in range(iterations):
    # Use TensorFlow’s GradientTape
    # to record the operations used to compute the cost 
    with tf.GradientTape() as tape:

        # Compute the cost (forward pass included in cost)
        cost_value = cofi_cost_func_v(X, W, b, Y, R, lambda_)

    # Use the gradient tape to automatically retrieve
    # the gradients of the trainable variables with respect to the loss
    grads = tape.gradient( cost_value, [X,W,b] )

    # Run one step of gradient descent by updating
    # the value of the variables to minimize the loss.
    optimizer.apply_gradients( zip(grads, [X,W,b]) )

    # Log periodically.
    if iter % 20 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 6326526.2
Training loss at iteration 20: 322218.2
Training loss at iteration 40: 132071.2
Training loss at iteration 60: 68227.5
Training loss at iteration 80: 40689.5
Training loss at iteration 100: 27034.5
Training loss at iteration 120: 19543.1
Training loss at iteration 140: 15131.5
Training loss at iteration 160: 12387.3
Training loss at iteration 180: 10599.8


In [60]:
predictions = np.dot(X, tf.transpose(W)) + b

In [61]:
# so, what is predictions? Predictions is a matrix of values that form the predictions for all movies and users. 
# Each row in the predictions matrix represents a movie and each column represents that user's predicted rating 
print(predictions)

tf.Tensor(
[[4.11085677 2.87297785 2.77680436 ... 2.38708211 3.03170469 4.81474688]
 [3.97450107 3.29062276 0.62962705 ... 2.03565525 3.14054114 3.55933237]
 [3.98537066 3.47054158 2.23596188 ... 2.01004576 3.24968171 4.03429709]
 ...
 [1.9553131  2.89185557 2.12513447 ... 2.40907178 3.27850633 0.95426291]
 [1.95519421 2.8917428  2.12571997 ... 2.40822145 3.27846831 0.95286049]
 [2.823532   3.20764636 2.15316761 ... 2.88045942 3.35836029 2.29884179]], shape=(9724, 610), dtype=float64)


In [62]:
# Now. To get a given user's top 20 reccomended movies, we would want to get the highest rated movies for user j where the user is represented
# in the column
# the following are the movie predictions for user 20. 

predictions_j = predictions[:,20]
print(predictions_j)

tf.Tensor([3.44946401 3.28439259 3.84570581 ... 0.38215946 0.3811201  1.23344786], shape=(9724,), dtype=float64)


In [66]:
# Sort from largest to smallest (highest predicted ratings first)
ix = tf.argsort(predictions_j, direction="DESCENDING").numpy()  # Convert tensor to numpy array

# Now we have the indices of the highest predicted movies. Show the top 10 predictions.
for i in range(10):
    movie_index = ix[i]  # Get the index of the i-th highest-rated movie
    print(movies_df.iloc[movie_index])  # Access the movie details using .iloc


movieId                           56003
title            Southland Tales (2006)
genres     Comedy|Drama|Sci-Fi|Thriller
Name: 6621, dtype: object
movieId                            50
title      Usual Suspects, The (1995)
genres         Crime|Mystery|Thriller
Name: 46, dtype: object
movieId                                   357
title      Four Weddings and a Funeral (1994)
genres                         Comedy|Romance
Name: 315, dtype: object
movieId                        1224
title                Henry V (1989)
genres     Action|Drama|Romance|War
Name: 925, dtype: object
movieId                      27846
title      Corporation, The (2003)
genres                 Documentary
Name: 5719, dtype: object
movieId                     5293
title      Changing Lanes (2002)
genres            Drama|Thriller
Name: 3785, dtype: object
movieId                                                595
title                          Beauty and the Beast (1991)
genres     Animation|Children|Fantasy|Musical|