In [48]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from surprise import BaselineOnly, Dataset, Reader, Trainset, KNNBaseline, SVD, accuracy
from surprise.model_selection import cross_validate, GridSearchCV
from scipy.io import loadmat

### Load Data

In [4]:
# load *.mat file
mat = loadmat('ex8_movies.mat')
Y, R = mat['Y'], mat['R']

# save *.csv file
Y_inlined = np.dstack([*np.indices(Y.T.shape), Y.T]).reshape(-1, 3)
np.savetxt('ex8_movies.csv', Y_inlined[Y_inlined[:,2] != 0], fmt="%f %f %f")

# create dataset from *.csv file
reader = Reader(line_format='user item rating', sep=" ")
data = Dataset.load_from_file("ex8_movies.csv", reader)

In [58]:
def test_baseline_only(data):
    bsl_options = {
        'method': 'sgd', 
        'learning_rate': 0.1,
        'reg': 0.02
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    trainset = data.build_full_trainset()
    testset = trainset.build_testset()
    
    algo.fit(trainset)
    p = algo.predict(uid=trainset.to_raw_uid(0), iid=trainset.to_raw_iid(0), r_ui=5)
    predictions =  algo.test(testset)
    
    print(p)
    accuracy.rmse(predictions)
    
test_baseline_only(data)

Estimating biases using sgd...
user: 0.000000   item: 0.000000   r_ui = 5.00   est = 3.42   {'was_impossible': False}
RMSE: 0.9716


In [59]:
def test_knn_baseline(data):
    bsl_options = {
        'method': 'sgd', 
        'learning_rate': .000005,
        'reg': 0.02
    }
    sim_options = {
        'name': 'cosine',
        'user_based': False
    }
    algo = KNNBaseline(k=10, bsl_options=bsl_options, sim_options=sim_options)
    trainset = data.build_full_trainset()
    testset = trainset.build_testset()
    
    algo.fit(trainset)
    p = algo.predict(uid=trainset.to_raw_uid(0), iid=trainset.to_raw_iid(0), r_ui=5)
    predictions =  algo.test(testset)
    
    print(p)
    accuracy.rmse(predictions)
    
test_knn_baseline(data)

Estimating biases using sgd...
Computing the cosine similarity matrix...
Done computing similarity matrix.
user: 0.000000   item: 0.000000   r_ui = 5.00   est = 4.71   {'actual_k': 10, 'was_impossible': False}
RMSE: 0.8288


In [61]:
def test_svd(data):
    algo = SVD(n_factors=100, n_epochs=100, lr_all=0.1)
    trainset = data.build_full_trainset()
    testset = trainset.build_testset()
    
    algo.fit(trainset)
    p = algo.predict(uid=trainset.to_raw_uid(0), iid=trainset.to_raw_iid(0), r_ui=5)
    predictions =  algo.test(testset)
    
    print(p)
    accuracy.rmse(predictions)
    
test_svd(data)

user: 0.000000   item: 0.000000   r_ui = 5.00   est = 4.60   {'was_impossible': False}
RMSE: 0.2484
