In [1]:
# Useful starting lines
%matplotlib inline

import numpy as np
import scipy
import scipy.io
import scipy.sparse as sp
import matplotlib.pyplot as plt
from helpers import *
from helpers import *
from plots import *
from plots import *
from split_data import *
from recommender import *
from cross_validation import *
%load_ext autoreload
%autoreload 2

## Load Data

In [2]:
path_dataset = "../data/data_train.csv"
ratings = load_data(path_dataset)

number of items: 10000, number of users: 1000


## Split data into training and test set

In [3]:
def split_data(ratings, p_test=0.1, seed=48):
    
    # set seed
    np.random.seed(seed)
    
    # generate random indices
    row = ratings.shape[0]
    col = ratings.shape[1]
    num = row*col
    
    indices = np.random.permutation(num)
    index_split = int(np.floor(p_test * num))
    
    index_tr_n = indices[: index_split]
    index_te_n = indices[index_split:]
    
    rat_arr = ratings.toarray()
    
    # reshaping
    valid_array_te = np.copy(rat_arr).reshape((num,1))
    valid_array_tr = np.copy(rat_arr).reshape((num,1))  
    
    # create split
    train = valid_array_tr
    train[index_tr_n] = 0
    train = train.reshape((row,col))
    
    test = valid_array_te
    test[index_te_n] = 0
    test = test.reshape((row,col))
    
    # ***************************************************
    
    #print (valid_ratings, train, test)
    return rat_arr, train, test
print("function 'compiled'")

function 'compiled'


In [4]:
rats, train, test = split_data(ratings, p_test=0.1, seed=46)
print("done")#2:10 much quicker than that :)


done


In [5]:
def user_mean(data):
    nnz_u = np.copy(data)
    nnz_u[np.where( data > 0 )] = 1
    return train.sum(axis=0) / nnz_u.sum(axis=0)

def item_mean(data):
    
    nnz_i = np.copy(data)
    nnz_i[np.where( data >1 )] = 1
    return train.sum(axis=1) / nnz_i.sum(axis=1)
    
print("functions 'compiled'")

functions 'compiled'


In [6]:
def init_MF_ALS_biased(train, num_features, factor_features=0.1, factor_biases=1):
    num_items, num_users = train.shape
    user_features = factor_features*np.ones((num_users,num_features))
    item_features = factor_features*np.ones((num_items,num_features))
    user_biases = factor_biases*np.ones(num_users)
    item_biases = factor_biases*np.ones(num_items)
    return user_features, item_features, user_biases, item_biases
print("function 'compiled'")

function 'compiled'


In [7]:
def init_MF_ALS(train, num_features, factor_features=0.1):
    num_items, num_users = train.shape
    user_features = factor_features*np.ones((num_users,num_features))
    item_features = factor_features*np.ones((num_items,num_features))
    return user_features, item_features
print("function 'compiled'")   

function 'compiled'


In [8]:
def data_user_biased(data, user_biases):
    data_user_biased = data - user_biases
    return data_user_biased
def data_item_biased(data, item_biases):
    data_item_biased = (data.T - item_biases).T
    return  data_item_biased
print("functions 'compiled'")

functions 'compiled'


In [9]:
def compute_error_prediction(data, prediction, nz):
    real_label = np.array([data[d,n] for (d,n) in nz])
    prediction_label = np.array([prediction[d,n] for (d,n) in nz])
    rmse = np.sqrt((1/len(nz))*calculate_mse(real_label,prediction_label))
    return rmse
print("function 'compiled'")

function 'compiled'


In [10]:
def prediction_biased(item_features, item_biases, user_features, user_biases):    
    prediction_data =    user_features.dot(item_features.T).T
    prediction = ((prediction_data + user_biases).T + item_biases).T       
    return prediction
print("function 'compiled'")

function 'compiled'


In [23]:
def update_item_biased_feature(train, user_features, user_biases, lambda_item, nnz_users_per_item, nz_item_userindices):
    
    num_users, num_features = user_features.shape
    num_items = train.shape[0]
    ones_biases = np.array([np.ones(num_users)])
    item_biases = np.zeros(num_items)
    item_features = np.zeros((num_items,num_features))         
        
    for item in np.arange(num_items): 
        nnz_users = nnz_users_per_item[item]
        nz_userindices = nz_item_userindices[item]
        nz_userfeatures = user_features[nz_userindices,:]
        nz_onesbiases = ones_biases[:,nz_userindices]
        nz_userbiases = user_biases[nz_userindices]
    
    
        Xt = np.concatenate((nz_onesbiases, nz_userfeatures.T), axis=0)
        A = Xt.dot(Xt.T) + lambda_item*nnz_users*np.eye(num_features+1)  
        train_item = (train[item,nz_userindices])
        b = Xt.dot(data_user_biased(train_item, nz_userbiases).T) 

        Yt = np.linalg.solve(A,b)
        
        item_features[item,:] = Yt[1:num_features+1]
        item_biases[item] = Yt[0]

    return item_features, item_biases

def update_user_biased_feature(train, item_features, item_biases, lambda_user, nnz_items_per_user, nz_user_itemindices):
    
    num_users = train.shape[1]
    num_items, num_features = item_features.shape
    ones_biases = np.array([np.ones(num_items)])
    user_biases = np.zeros(num_users)
    user_features = np.zeros((num_users,num_features))
    
    for user in np.arange(num_users):        
        nnz_items = nnz_items_per_user[user]
        nz_itemindices = nz_user_itemindices[user]
        nz_itemfeatures = item_features[nz_itemindices,:]
        nz_onesbiases = ones_biases[:,nz_itemindices]
        nz_itembiases = item_biases[nz_itemindices]
        
    
        Yt = np.concatenate((nz_onesbiases, nz_itemfeatures.T), axis=0)
        A = Yt.dot(Yt.T) + lambda_user*nnz_items*np.eye(num_features+1)  
        train_user = train[nz_itemindices,user]
        b = Yt.dot(data_item_biased(train_user, nz_itembiases)) 
        Xt = np.linalg.solve(A,b)
        
        user_features[user,:] = Xt[1:num_features+1]
        user_biases[user] = Xt[0]

    return user_features, user_biases

print("functions 'compiled'")

functions 'compiled'


In [28]:
def ALS_biased(train, test, seed=52):
    """Alternating Least Squares (ALS) algorithm."""
    # define parameters
    num_features = 10   # K in the lecture notes
    lambda_user = 0.01
    lambda_item = 0.01
    
    stop_criterion = 1e-7
    #change = 1
    error_list = [0, 0]
    max_it = 10 
    
    error_old = 10
    error_new = 5
    
    # set seed
    np.random.seed(seed)

    # init ALS
    user_features, item_features, user_biases, item_biases = init_MF_ALS_biased(train, num_features)
    
    # ***************************************************
    
    nz_row, nz_col = train.nonzero()
    nz_train = list(zip(nz_row, nz_col))    
    
    nz_row, nz_col = test.nonzero()
    nz_test = list(zip(nz_row, nz_col))
    
    nz_train, nz_row_colindices, nz_col_rowindices = build_index_groups(train)
    _,nz_user_itemindices = map(list,zip(*nz_col_rowindices))
    nnz_items_per_user = [len(i) for i in nz_user_itemindices]
    _,nz_item_userindices = map(list,zip(*nz_row_colindices))
    nnz_users_per_item = [len(i) for i in nz_item_userindices]

    print("learn the matrix factorization using ALS...")

    for it in np.arange(max_it):
        
        
        item_features, item_biases = update_item_biased_feature(train, user_features, user_biases, lambda_item, nnz_users_per_item, nz_item_userindices)
        user_features, user_biases = update_user_biased_feature(train, item_features, item_biases, lambda_user, nnz_items_per_user, nz_user_itemindices)
        
        prediction = prediction_biased(item_features, item_biases, user_features, user_biases)        
        rmse = compute_error_prediction(train, prediction, nz_train)        
        print("iter: {}, RMSE on training set: {}.".format(it, rmse))
        
        error_new = compute_error_prediction(test, prediction, nz_test)
        
        error_list.append(rmse)
        if abs(error_list[-1]-error_list[-2])<stop_criterion:
            break
        if error_new>error_old:
            print("Best iter: {}, with RMSE on test data: {}. ".format(it-1,error_old))
            break
        error_old = error_new

    prediction = prediction_biased(item_features, item_biases, user_features, user_biases)
    rmse = compute_error_prediction(test, prediction, nz_test)
    print("RMSE on test data: {}.".format(rmse))
    print("done")
    
    # ***************************************************

ALS_biased(train, test)

learn the matrix factorization using ALS...
iter: 0, RMSE on training set: 0.9942376202932252.
iter: 1, RMSE on training set: 0.977796018913231.
iter: 2, RMSE on training set: 0.9747794410155082.
iter: 3, RMSE on training set: 0.9735733192748869.
iter: 4, RMSE on training set: 0.9731092932921972.
iter: 5, RMSE on training set: 0.9728947213461506.
iter: 6, RMSE on training set: 0.959069630514617.
iter: 7, RMSE on training set: 0.9000181762710602.
Best iter: 6, with RMSE on test data: 0.9911782750011228. 
RMSE on test data: 1.0121953881685413.
done


In [29]:
def prediction_non_biased(item_features, user_features):    
    prediction = user_features.dot(item_features.T).T    
    return prediction
print("function 'compiled'")

function 'compiled'


In [73]:
def update_user_feature(
        train, item_features, lambda_user,
        nnz_items_per_user, nz_user_itemindices):
    """update user feature matrix."""
    # ***************************************************
    num_items,num_users = train.shape
    num_features = item_features.shape[1]
    user_feature = np.zeros((num_users,num_features))
    for user in np.arange(num_users):
        nnz_items = nnz_items_per_user[user]
        nz_itemindices = nz_user_itemindices[user]
        nz_itemfeatures = item_features[nz_itemindices,:]
        A = ((nz_itemfeatures.T).dot(nz_itemfeatures)+lambda_user*nnz_items*np.eye(num_features))
        train_user = train[nz_itemindices,user]
        b = ((nz_itemfeatures.T).dot(train_user))
        user_feature[user,:] = np.linalg.solve(A,b)
    # ***************************************************
    return user_feature

def update_item_feature(
        train, user_features, lambda_item,
        nnz_users_per_item, nz_item_userindices):
    """update item feature matrix."""
    # ***************************************************
    num_items,num_users = train.shape
    num_features = user_features.shape[1]
    item_feature = np.zeros((num_items,num_features))
    for item in np.arange(num_items):
        nnz_users = nnz_users_per_item[item]
        nz_userindices = nz_item_userindices[item]
        nz_userfeatures = user_features[nz_userindices,:]
        A = ((nz_userfeatures.T).dot(nz_userfeatures)+lambda_item*nnz_users*np.eye(num_features))
        train_item = (train[item,nz_userindices])
        b = ((nz_userfeatures.T).dot(train_item))
        item_feature[item,:] = np.linalg.solve(A,b)
    # ***************************************************
    return item_feature
print("functions 'compiled'")

functions 'compiled'


In [74]:
from helpers import build_index_groups
np.seterr(all='raise') 
def ALS(train, test, seed=552):
    """Alternating Least Squares (ALS) algorithm."""
    # define parameters
    num_features = 10   # K in the lecture notes
    lambda_user = 0.1
    lambda_item = 0.1
    
    stop_criterion = 1e-7
    #change = 1
    error_list = [0, 0]
    max_it = 10 
    
    error_old = 10
    error_new = 5
    
    # set seed
    np.random.seed(seed)

    # init ALS
    user_features, item_features = init_MF_ALS(train, num_features)
    
    # ***************************************************
    
    nz_row, nz_col = train.nonzero()
    nz_train = list(zip(nz_row, nz_col))    
    
    nz_row, nz_col = test.nonzero()
    nz_test = list(zip(nz_row, nz_col))
    
    
    nz_train, nz_row_colindices, nz_col_rowindices = build_index_groups(train)
    _,nz_user_itemindices = map(list,zip(*nz_col_rowindices))
    nnz_items_per_user = [len(i) for i in nz_user_itemindices]
    _,nz_item_userindices = map(list,zip(*nz_row_colindices))
    nnz_users_per_item = [len(i) for i in nz_item_userindices]

    print("learn the matrix factorization using ALS...")

    for it in np.arange(max_it):
        
        item_features = update_item_feature(train, user_features, lambda_item, nnz_users_per_item, nz_item_userindices)

        user_features = update_user_feature(train, item_features, lambda_user, nnz_items_per_user, nz_user_itemindices)
        
        prediction = prediction_non_biased(item_features, user_features)
        
        rmse = compute_error_prediction(train, prediction, nz_train)
        print("iter: {}, RMSE on training set: {}.".format(it, rmse))
        
        error_new = compute_error_prediction(test, prediction, nz_test)
        error_list.append(rmse)
        if abs(error_list[-1]-error_list[-2])<stop_criterion:
            break
        if error_new>error_old:
            break
        error_old = error_new
        
    prediction = prediction_non_biased(item_features, user_features)
    rmse = compute_error_prediction(test, prediction, nz_test)
    print("RMSE on test data: {}.".format(rmse))
    print("done")
    
    # ***************************************************

ALS(train, test)

learn the matrix factorization using ALS...
iter: 0, RMSE on training set: 0.9959192861108764.
iter: 1, RMSE on training set: 0.9912939750943721.
iter: 2, RMSE on training set: 0.9908659304261874.
iter: 3, RMSE on training set: 0.990810472982908.
iter: 4, RMSE on training set: 0.9908690709872654.
RMSE on test data: 1.0025515814822392.
done


In [None]:
#OLD CODE USELESS (KIND OF BAK FILE)


In [84]:
train_user_biased, train_item_biased = data_user_biased(train, user_biases),data_item_biased(train, item_biases)
print(train_user_biased.shape)
print(train_item_biased.shape)


(10000, 1000)
(10000, 1000)


In [79]:
u_mean = user_mean(train)
i_mean = item_mean(train)

print(u_mean.shape)
print(i_mean.shape)

(1000,)
(10000,)


In [None]:
user_features, item_features, user_biases, item_biases = init_MF_ALS_biased(train, 15)

print(user_features.shape)
print(item_features.shape)
print(user_biases.shape)
print(item_biases.shape)

In [250]:
def matrix_factorization_SGD(train, test): #rly bad
    """matrix factorization by SGD."""
    # define parameters
    gamma = 0.01
    num_features = 10   # K in the lecture notes
    lambda_user = 0.1
    lambda_item = 0.7
    num_epochs = 20     # number of full passes through the train set
    errors = [0]
    
    # set seed
    np.random.seed(988)

    # init matrix
    user_features, item_features = init_MF(train, num_features)
    
    # find the non-zero ratings indices 
    nz_row, nz_col = train.nonzero()
    nz_train = list(zip(nz_row, nz_col))
    nz_row, nz_col = test.nonzero()
    nz_test = list(zip(nz_row, nz_col))

    num_items, num_users = train.shape
    
    print("learn the matrix factorization using SGD...")
    for it in range(num_epochs):        
        # shuffle the training rating indices
        np.random.shuffle(nz_train)
        
        # decrease step size
        #gamma /= 1.2
        
        
        
        for d, n in nz_train:
        # ***************************************************
            prediction = prediction_non_biased(item_features, user_features)
            gradient = np.zeros(((num_items + num_users),num_features))
            prediction_error = (train[d,n] - prediction[d,n])
            #print(prediction_error)
            #gradient entries for W
            gradient[d,:] = -(prediction_error)*(user_features[n,:].T) + lambda_item*item_features[d,:]
            #gradient entries for Z
            gradient[num_items+n,:] = -(prediction_error)*(item_features[d,:]) + lambda_user*user_features[n,:]
            
            #update
            item_features = item_features - gamma*(gradient[:num_items,:])
            user_features = user_features - gamma*(gradient[num_items:,:])
            
        rmse = compute_error_biased(train, prediction, nz_train)

        print("iter: {}, RMSE on training set: {}.".format(it, rmse))
        
        errors.append(rmse)

        # decrease step size
        gamma /= 1.2
    # ***************************************************
    # TODO
    # evaluate the test error.
    # ***************************************************
    rmse = 0#compute_error_biased(test, user_features, item_features, nz_test)
    print("RMSE on test data: {}.".format(rmse))

matrix_factorization_SGD(train, test)




learn the matrix factorization using SGD...


KeyboardInterrupt: 

In [None]:
item_features, item_biases = update_item_biased_feature(train, user_features, user_biases, 0.01)

print(item_features.shape)
print(item_biases.shape)

user_features, user_biases = update_user_biased_feature(train, item_features, item_biases, 0.01)

print(user_features.shape)
print(user_biases.shape)

In [110]:
    nz_row, nz_col = test.nonzero()
    nz_test = list(zip(nz_row, nz_col))
    nz_train, nz_row_colindices, nz_col_rowindices = build_index_groups(train)
    _,nz_user_itemindices = map(list,zip(*nz_col_rowindices))
    nnz_items_per_user = [len(i) for i in nz_user_itemindices]
    _,nz_item_userindices = map(list,zip(*nz_row_colindices))
    nnz_users_per_item = [len(i) for i in nz_item_userindices]
    max_it = 20
    

(10000, 1000)


In [97]:
print(i_feat.dot(u_feat.T).shape)

def update_item_feature(train, user_features, lambda_item):
    num_users, num_features = user_features.shape
    
    Xt = user_features.T
    A = Xt.dot(Xt.T) + lambda_item*np.eye(num_features)  
    b = Xt.dot(train.T) 

    Yt = np.linalg.solve(A,b)
    item_features = Yt.T

    return item_features

print("function 'compiled'")

def update_user_feature(train, item_features, lambda_user):
    num_items, num_features = item_features.shape
    
    Yt = item_features.T
    A = Yt.dot(Yt.T) + lambda_user*np.eye(num_features)  
    b = Yt.dot(train) 

    Xt = np.linalg.solve(A,b)
    user_features = Xt.T

    return user_features

print("function 'compiled'")

(1000, 8)
(10000, 8)


## Matrix factorisation using SGD

In [None]:
%run run.py 0

## ALS

In [None]:
%run run.py 1

## Other Methods
### CCD

In [26]:
%run run.py 2

Loading training data
number of items: 10000, number of users: 1000
Preprocessing data
Splitting data into train and test sets
Training model
learn the matrix factorization using CCD...
iter: 0, RMSE on training set: 0.9960226377334059.
iter: 1, RMSE on training set: 0.9960226376398212.
RMSE on test data: 1.0065024878485005.
RMSE on train data: 0.9960226376398212.
RMSE on test data: 1.0065024878485005.


In [None]:
valid_ratings, train_arr, test_arr = split_data(
    ratings, num_items_per_user, num_users_per_item, min_num_ratings=10, p_test=0.1)
#plot_train_test_data(train_validation, test)

## Cross validation

### Number of features

In [21]:
## !!! Takes long time to run

method = 2     # 0-SGD 1-ALS
K = 5         ## K-fold cross validation
gamma = 0.01
num_features_arr = [1, 3, 5, 7, 10, 13, 15]   # K in the lecture notes
lambda_user = 0.1
lambda_item = 0.7
min_num_ratings=10

train_rmse_mean = np.zeros(len(num_features_arr))
train_rmse_std = np.zeros(len(num_features_arr))
validation_rmse_mean = np.zeros(len(num_features_arr))
validation_rmse_std = np.zeros(len(num_features_arr))

for i, num_features in enumerate(num_features_arr):
    train_rmse_arr = []
    validation_rmse_arr = []
    
    print('Running num_features={n}'.format(n=num_features))
    [train_rmse_arr, validation_rmse_arr] = cross_validation(ratings, K, method, num_items_per_user, 
                                                             num_users_per_item, min_num_ratings, num_features, lambda_user, lambda_item)
        
    ## Calculate mean and standard deviation    
    train_rmse_mean[i] = np.mean(train_rmse_arr)
    train_rmse_std[i] = np.std(train_rmse_arr)
    validation_rmse_mean[i] = np.mean(validation_rmse_arr)
    validation_rmse_std[i] = np.std(validation_rmse_std)
    
## Plotting results
plt.fill_between(num_features_arr, train_rmse_mean - train_rmse_std,
                     train_rmse_mean + train_rmse_std, alpha=0.1, color="r")
plt.fill_between(num_features_arr, validation_rmse_mean - validation_rmse_std,
                     validation_rmse_mean + validation_rmse_std, alpha=0.1, color="g")
plt.plot(num_features_arr, train_rmse_mean, 'o-', color="r")
plt.plot(num_features_arr, validation_rmse_mean, 'o-', color="g")
plt.legend(('Train', 'Validation'))
plt.xlabel('Number of features (K)'); plt.ylabel('RMSE');
plt.show()

Running num_features=1
Running 1th fold in 5 folds
learn the matrix factorization using CCD...
iter: 0, RMSE on training set: 0.9964503244960371.
iter: 1, RMSE on training set: 0.9964503242477375.
RMSE on test data: 0.9964587238475606.
Running 2th fold in 5 folds
learn the matrix factorization using CCD...
iter: 0, RMSE on training set: 0.9964503244960371.
iter: 1, RMSE on training set: 0.9964503242477375.
RMSE on test data: 0.9964584304313565.
Running 3th fold in 5 folds
learn the matrix factorization using CCD...
iter: 0, RMSE on training set: 0.9964503244960371.
iter: 1, RMSE on training set: 0.9964503242477375.
RMSE on test data: 0.9964523313834746.
Running 4th fold in 5 folds
learn the matrix factorization using CCD...
iter: 0, RMSE on training set: 0.9964503244960371.
iter: 1, RMSE on training set: 0.9964503242477375.
RMSE on test data: 0.9964418033298583.
Running 5th fold in 5 folds
learn the matrix factorization using CCD...
iter: 0, RMSE on training set: 0.9964503244960371.
it

KeyboardInterrupt: 

In [14]:
print(train_rmse_mean)
print(train_rmse_std)
print(validation_rmse_mean)
print(validation_rmse_std)

[ 0.99236697]
[  1.11022302e-16]
[ 0.99235064]
[ 0.]


### lambda_user

In [None]:
## !!! Takes long time to run

method = 2     # 0-SGD 1-ALS
K = 10        ## K-fold cross validation
gamma = 0.01
num_features = 20   # K in the lecture notes
lambda_user_arr = [0.01, 0.1, 1, 10]
lambda_item = 0.7

train_rmse_mean = np.zeros(len(lambda_user_arr))
train_rmse_std = np.zeros(len(lambda_user_arr))
validation_rmse_mean = np.zeros(len(lambda_user_arr))
validation_rmse_std = np.zeros(len(lambda_user_arr))

for i, lambda_user in enumerate(lambda_user_arr):
    train_rmse_arr = []
    validation_rmse_arr = []
    
    print('Running lambda_user={n}'.format(n=lambda_user))
    [train_rmse_arr, validation_rmse_arr] = cross_validation(ratings, K, method, num_items_per_user, 
                                                             num_users_per_item, min_num_ratings=10)
        
    train_rmse_mean[i] = np.mean(train_rmse_arr)
    train_rmse_std[i] = np.std(train_rmse_arr)
    validation_rmse_mean[i] = np.mean(validation_rmse_arr)
    validation_rmse_std[i] = np.std(validation_rmse_std)
    
## Plotting results
plt.fill_between(lambda_user_arr, train_rmse_mean - train_rmse_std,
                     train_rmse_mean + train_rmse_std, alpha=0.1, color="r")
plt.fill_between(lambda_user_arr, validation_rmse_mean - validation_rmse_std,
                     validation_rmse_mean + validation_rmse_std, alpha=0.1, color="g")
plt.plot(lambda_user_arr, train_rmse_mean, 'o-', color="r")
plt.plot(lambda_user_arr, validation_rmse_mean, 'o-', color="g")
plt.legend(('Train', 'Validation'))
plt.xlabel('Lambda user'); plt.ylabel('RMSE');
plt.show()

Running lambda_user=0.01
Running 1th fold in 10 folds


### Lambda item

In [None]:
## !!! Takes long time to run

method = 0     # 0-SGD 1-ALS
K = 10        ## K-fold cross validation
gamma = 0.01
num_features = 20   # K in the lecture notes
lambda_user = 0.1
lambda_item_arr = [0.01, 0.1, 0.5, 1]

train_rmse_mean = np.zeros(len(lambda_item_arr))
train_rmse_std = np.zeros(len(lambda_item_arr))
validation_rmse_mean = np.zeros(len(lambda_item_arr))
validation_rmse_std = np.zeros(len(lambda_item_arr))

for i, lambda_item in enumerate(lambda_item_arr):
    train_rmse_arr = []
    validation_rmse_arr = []
    
    print('Running lambda_item={n}'.format(n=lambda_item))
    [train_rmse_arr, validation_rmse_arr] = cross_validation(ratings, K, method, num_items_per_user, 
                                                             num_users_per_item, min_num_ratings=10)
        
    train_rmse_mean[i] = np.mean(train_rmse_arr)
    train_rmse_std[i] = np.std(train_rmse_arr)
    validation_rmse_mean[i] = np.mean(validation_rmse_arr)
    validation_rmse_std[i] = np.std(validation_rmse_std)
    
## Plotting results
plt.fill_between(lambda_item_arr, train_rmse_mean - train_rmse_std,
                     train_rmse_mean + train_rmse_std, alpha=0.1, color="r")
plt.fill_between(lambda_item_arr, validation_rmse_mean - validation_rmse_std,
                     validation_rmse_mean + validation_rmse_std, alpha=0.1, color="g")
plt.plot(lambda_item_arr, train_rmse_mean, 'o-', color="r")
plt.plot(lambda_item_arr, validation_rmse_mean, 'o-', color="g")
plt.legend(('Train', 'Validation'))
plt.xlabel('Lambda item'); plt.ylabel('RMSE');
plt.show()

### Learning rate

In [None]:
## !!! Takes long time to run

method = 0     # 0-SGD
K = 10        ## K-fold cross validation
gamma_arr = [0.01, 0.1, 1]
num_features = 20   # K in the lecture notes
lambda_user = 0.1
lambda_item = 0.5

train_rmse_mean = np.zeros(len(gamma_arr))
train_rmse_std = np.zeros(len(gamma_arr))
validation_rmse_mean = np.zeros(len(gamma_arr))
validation_rmse_std = np.zeros(len(gamma_arr))

for i, gamma in enumerate(gamma_arr):
    train_rmse_arr = []
    validation_rmse_arr = []
    
    print('Running gamma={n}'.format(n=gamma))
    [train_rmse_arr, validation_rmse_arr] = cross_validation(ratings, K, method, num_items_per_user, 
                                                             num_users_per_item, min_num_ratings=10)
        
    train_rmse_mean[i] = np.mean(train_rmse_arr)
    train_rmse_std[i] = np.std(train_rmse_arr)
    validation_rmse_mean[i] = np.mean(validation_rmse_arr)
    validation_rmse_std[i] = np.std(validation_rmse_std)
    
## Plotting results
plt.fill_between(gamma_arr, train_rmse_mean - train_rmse_std,
                     train_rmse_mean + train_rmse_std, alpha=0.1, color="r")
plt.fill_between(gamma_arr, validation_rmse_mean - validation_rmse_std,
                     validation_rmse_mean + validation_rmse_std, alpha=0.1, color="g")
plt.plot(gamma_arr, train_rmse_mean, 'o-', color="r")
plt.plot(gamma_arr, validation_rmse_mean, 'o-', color="g")
plt.legend(('Train', 'Validation'))
plt.xlabel('Learning Rate'); plt.ylabel('RMSE');
plt.show()

### TODO
#### 1. Compare SGD, ALS with the best set of parameters (based on above results)