In [1]:
import scipy.io
import time
import numpy as np

In [None]:
## Please do not forget to keep `movies_data.mat` file in the same folder as this notebook.

In [13]:
def my_recommender(rate_mat, lr, with_reg):
    """
    :param rate_mat: training rating matrix (with zeros for missing ratings)
    :param lr: latent dimension (low rank)
    :param with_reg: boolean flag, set true for using regularization
    :return: U, V, b_u, b_i, global_bias"""

    #initializations
    n_user, n_item = rate_mat.shape
    U = np.random.rand(n_user, lr) / lr
    V = np.random.rand(n_item, lr) / lr
    b_u = np.zeros((n_user, 1))
    b_i = np.zeros((n_item, 1))
    global_bias = np.sum(rate_mat) / np.sum(rate_mat > 0)

    # TODO pick hyperparams
    max_iter = 100
    learning_rate = 0.01
    
    # if with_reg is true then we have the regurization term, otherwise the lambda is 0.
    if with_reg ==True:
        u_reg = 0.1
        v_reg = 0.1
        b_reg = 0.01
    else:
        u_reg = 0
        v_reg = 0
        b_reg = 0

    # TODO implement your code here
    itr = 0
    avg_error = 1
    rate_mat_size = np.count_nonzero(rate_mat)
    
    # Indices where ratings exist
    non_zero_indices = np.argwhere(rate_mat > 0)
    #rate_mat_size = np.count_nonzero(rate_mat) don't need this, could just use len(non_zero_indices)
    
    while(itr <= max_iter and avg_error>0.1):
        total_error = 0
        # Loop over all known ratings
        for u, i in non_zero_indices:
            # True rating
            r_ui = rate_mat[u, i]
            
            # Predicted rating
            pred_ui = global_bias+b_u[u]+b_i[i]+np.dot(U[u,:],V[i,:].T)

            #error
            error = r_ui-pred_ui
            total_error += error**2

            #Updateing U,V
            U[u,:] += learning_rate * (error * V[i,:] - u_reg * U[u,:])
            V[i,:] += learning_rate * (error * U[u,:] - v_reg * V[i,:])

            #Updating bias b_u,b_i
            b_u[u] += learning_rate * (error - b_reg * b_u[u])
            b_i[i] += learning_rate * (error - b_reg * b_i[i])


        itr+=1
        avg_error = total_error/len(non_zero_indices)
    


    return U, V, b_u, b_i, global_bias

In [14]:
cell = scipy.io.loadmat('movies_data.mat')
rate_mat = cell['train']
test_mat = cell['test']

low_rank_ls = [1, 3, 5]
for lr in low_rank_ls:
    for reg_flag in [False, True]:
        st = time.time()
        U, V, b_u, b_i, global_bias = my_recommender(rate_mat, lr, reg_flag)
        t = time.time() - st
        
        # Compute RMSE for training set
        mask_train = (rate_mat > 0)
        train_pred = global_bias + b_u + b_i.T + U.dot(V.T)
        train_rmse = np.sqrt(np.sum(((rate_mat - train_pred) * mask_train) ** 2) / float(np.sum(mask_train)))
        
        # Compute RMSE for test set
        mask_test = (test_mat > 0)
        test_pred = global_bias + b_u + b_i.T + U.dot(V.T)
        test_rmse = np.sqrt(np.sum(((test_mat - test_pred) * mask_test) ** 2) / float(np.sum(mask_test)))
        
        print('SVD-%s-%i\t%.4f\t%.4f\t%.2f\n' % ('withReg' if reg_flag else 'noReg', lr, train_rmse, test_rmse, t))

SVD-noReg-1	0.8632	0.9241	129.37

SVD-withReg-1	0.8709	0.9186	119.69

SVD-noReg-3	0.8081	0.9552	129.19

SVD-withReg-3	0.8272	0.9087	122.41

SVD-noReg-5	0.7522	0.9820	130.22

SVD-withReg-5	0.7967	0.9129	124.93

