In [10]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd  
import tensorflow as tf
from tensorflow import keras 


##### The dataset to practice building a recommender system including 100,000 ratings and 3,600 tag applications applied to 9,000 movies by 600 users. It is available on the grouplens website. 

In [11]:
data = pd.read_csv('ratings.csv') 
data.head(n=5)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [12]:
users = data['userId'].unique() 
movies = data['movieId'].unique()

num_users = len(users) 
num_movies = len(movies) 


userid_to_index = {user_id:idx for idx,user_id in enumerate(users)} # A dict comprises elements having the form user_id: idx 
movieid_to_index = {movie_id:idx for idx,movie_id in enumerate(movies)} # Because movies.max > len(movie) :D 

data['user_index'] = data['userId'].map(userid_to_index).astype(int) # mapping 
data['movie_index'] = data['movieId'].map(movieid_to_index).astype(int) 

print(f"the number of movies {num_movies}") 
print(f"the number of users {num_users}") 

data.head()

the number of movies 9724
the number of users 610


Unnamed: 0,userId,movieId,rating,timestamp,user_index,movie_index
0,1,1,4.0,964982703,0,0
1,1,3,4.0,964981247,0,1
2,1,6,4.0,964982224,0,2
3,1,47,5.0,964983815,0,3
4,1,50,5.0,964982931,0,4


##### Create y and R matrixs 

In [13]:
y = np.zeros((num_movies,num_users))
for _,row in data.iterrows() : 
    num_movie = int(row['movie_index']) 
    num_user = int(row['user_index']) 
    y[num_movie,num_user] = row['rating'] 

R = np.where(y >0,1,0) 


$$J({\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},...,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}})= \left[ \frac{1}{2}\sum_{(i,j):r(i,j)=1}(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+ \underbrace{\left[
\frac{\lambda}{2}
\sum_{j=0}^{n_u-1}\sum_{k=0}^{n-1}(\mathbf{w}^{(j)}_k)^2
+ \frac{\lambda}{2}\sum_{i=0}^{n_m-1}\sum_{k=0}^{n-1}(\mathbf{x}_k^{(i)})^2
\right]}_{regularization}
\tag{1}$$

In [14]:
def cost_function(X,W,b,y,R,lambda_) : 
    """
    X(ndarray) : (num_movies,num_features) 
    W(ndarray) : (num_users,num_features) 
    b(ndarray) : (num_users,) 
    y(ndarray) : (num_movies,num_users) 
    R(ndarray) : (num_movies,num_users) 
    Return: 
    J(int)(tensor) : cost 
    """
    J = 0 
    J = 1/2 * tf.reduce_sum((R*(tf.matmul(X,tf.transpose(W)) + b - y)**2)) + (lambda_/2) * ( tf.reduce_sum(W**2) + tf.reduce_sum(X**2))
    return J 



##### Initialize variables. You have to decide what value of num_features is. 

In [15]:
num_features  = 80 

tf.random.set_seed(28) 

W = tf.Variable(tf.random.normal((num_users,num_features), dtype = tf.float64, name = 'W'))
X = tf.Variable(tf.random.normal((num_movies,num_features), dtype = tf.float64, name = 'X')) 
b = tf.Variable(tf.random.normal((1,num_users) , dtype = tf.float64, name = 'b'))


optimizer = keras.optimizers.Adam(0.5) 


In [17]:
iterations = 2000
lambda_ = 0.1 

for i in range(iterations) : 
    with tf.GradientTape() as tape: 
        loss = cost_function(X,W,b,y,R,lambda_) 
    grads = tape.gradient(loss,[X,W,b]) 
    optimizer.apply_gradients(zip(grads,[X,W,b])) 
    if i % 20 == 0 : 
        print(f"Training loss at iteration {i}: {loss:0.1f}") 


Training loss at iteration 0: 4714530.7
Training loss at iteration 20: 169267.2
Training loss at iteration 40: 75210.4
Training loss at iteration 60: 37317.3
Training loss at iteration 80: 20531.6
Training loss at iteration 100: 12277.9
Training loss at iteration 120: 7805.5
Training loss at iteration 140: 5218.6
Training loss at iteration 160: 3649.6
Training loss at iteration 180: 2660.4
Training loss at iteration 200: 2017.0
Training loss at iteration 220: 1587.7
Training loss at iteration 240: 1294.7
Training loss at iteration 260: 1091.5
Training loss at iteration 280: 947.3
Training loss at iteration 300: 843.9
Training loss at iteration 320: 768.8
Training loss at iteration 340: 716.9
Training loss at iteration 360: 672.4
Training loss at iteration 380: 641.0
Training loss at iteration 400: 621.3
Training loss at iteration 420: 608.3
Training loss at iteration 440: 586.5
Training loss at iteration 460: 580.3
Training loss at iteration 480: 573.8
Training loss at iteration 500: 5

In [None]:
##### predict ratings of user that have index 0 

In [39]:
user_0 = tf.matmul(X,tf.transpose(W[:1,:])) + b[:,:1] 

In [52]:
user_0[225:235]

<tf.Tensor: shape=(10, 1), dtype=float64, numpy=
array([[4.67926623],
       [4.23076635],
       [3.94033501],
       [4.93128297],
       [3.89826064],
       [3.90021243],
       [5.03131744],
       [4.30870542],
       [3.4712942 ],
       [3.83452667]])>

In [51]:
y[225:235,:1]

array([[5.],
       [4.],
       [4.],
       [5.],
       [4.],
       [4.],
       [5.],
       [0.],
       [0.],
       [0.]])

#### model predict ratings of user_0 for movie having index 232,233,234 are 4.3,3.4,3.8