# Import Libreries

In [43]:
import numpy as np
import matplotlib.pyplot as plt
from surprise import SVD, SVDpp, NMF
from surprise import Dataset
from surprise.model_selection import cross_validate
import itertools
import pandas as pd

In [4]:
data = Dataset.load_builtin('ml-100k')

# Train model

In [12]:
def evaluate_model(model, param_grid, data, measures=['RMSE', 'MAE'], cv=5, random_state = 42):
    results = []

    keys, values = zip(*param_grid.items())
    param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
    
    for params in param_combinations:
        algo = model(**params, random_state=random_state)
        cv_results = cross_validate(algo, data, measures=measures, cv=cv)
        mean_rmse = cv_results['test_rmse'].mean()
        mean_mae = cv_results['test_mae'].mean()
        mean_fit_time = np.array(cv_results['fit_time']).mean()
        mean_test_time = np.array(cv_results['test_time']).mean()

        results.append({
            'model': model.__name__,
            'parameters': params, 
            'mean_rmse': mean_rmse, 
            'mean_mae': mean_mae, 
            'mean_fit_time': mean_fit_time, 
            'mean_test_time': mean_test_time})
        
    return results

In [13]:
param_svd = {
    'n_factors': [20, 50, 100],
    'lr_all': [0.001, 0.005, 0.01],
    'reg_all': [0.01, 0.02, 0.1]
}

param_nmf = {
    'n_factors': [5, 10, 15, 30],
    'reg_pu': [0.005, 0.01, 0.3, 0.6],
    'reg_qi': [0.005, 0.01, 0.1, 0.2]
}

print('Start SVD test')
svd_results = evaluate_model(SVD, param_svd, data)

print("SVD Results:")
for result in svd_results:
    params = result['parameters']
    rmse = result['mean_rmse']
    mae = result['mean_mae']
    time_fit = result['mean_fit_time']
    time_test = result['mean_test_time']
    print(f"Params: {params}, RMSE: {rmse: 0.5f}, MAE: {mae: 0.5f}, time_fit: {time_fit: 0.5f}, time_test: {time_test: 0.5f}")
print()

Start SVD test
SVD Results:
Params: {'n_factors': 20, 'lr_all': 0.001, 'reg_all': 0.01}, RMSE:  0.95926, MAE:  0.76156, time_fit:  0.71264, time_test:  0.12904
Params: {'n_factors': 20, 'lr_all': 0.001, 'reg_all': 0.02}, RMSE:  0.95998, MAE:  0.76279, time_fit:  0.65539, time_test:  0.13600
Params: {'n_factors': 20, 'lr_all': 0.001, 'reg_all': 0.1}, RMSE:  0.96156, MAE:  0.76564, time_fit:  0.65980, time_test:  0.12840
Params: {'n_factors': 20, 'lr_all': 0.005, 'reg_all': 0.01}, RMSE:  0.93904, MAE:  0.74087, time_fit:  0.71730, time_test:  0.14061
Params: {'n_factors': 20, 'lr_all': 0.005, 'reg_all': 0.02}, RMSE:  0.93635, MAE:  0.73918, time_fit:  0.65000, time_test:  0.12059
Params: {'n_factors': 20, 'lr_all': 0.005, 'reg_all': 0.1}, RMSE:  0.94089, MAE:  0.74607, time_fit:  0.64019, time_test:  0.10341
Params: {'n_factors': 20, 'lr_all': 0.01, 'reg_all': 0.01}, RMSE:  0.95595, MAE:  0.74589, time_fit:  0.66640, time_test:  0.11399
Params: {'n_factors': 20, 'lr_all': 0.01, 'reg_all'

In [14]:
print('Start SVD++ test')
svdpp_results = evaluate_model(SVDpp, param_svd, data)

print("SVD++ Results:")
for result in svdpp_results:
    params = result['parameters']
    rmse = result['mean_rmse']
    mae = result['mean_mae']
    time_fit = result['mean_fit_time']
    time_test = result['mean_test_time']
    print(f"Params: {params}, RMSE: {rmse: 0.5f}, MAE: {mae: 0.5f}, time_fit: {time_fit: 0.5f}, time_test: {time_test: 0.5f}")
print()

Start SVD++ test
SVD++ Results:
Params: {'n_factors': 20, 'lr_all': 0.001, 'reg_all': 0.01}, RMSE:  0.95131, MAE:  0.75392, time_fit:  25.38549, time_test:  3.61863
Params: {'n_factors': 20, 'lr_all': 0.001, 'reg_all': 0.02}, RMSE:  0.95603, MAE:  0.75876, time_fit:  24.66155, time_test:  3.53042
Params: {'n_factors': 20, 'lr_all': 0.001, 'reg_all': 0.1}, RMSE:  0.96096, MAE:  0.76523, time_fit:  24.30061, time_test:  3.46573
Params: {'n_factors': 20, 'lr_all': 0.005, 'reg_all': 0.01}, RMSE:  0.91736, MAE:  0.72039, time_fit:  24.15920, time_test:  3.49019
Params: {'n_factors': 20, 'lr_all': 0.005, 'reg_all': 0.02}, RMSE:  0.92012, MAE:  0.72418, time_fit:  24.05555, time_test:  3.49092
Params: {'n_factors': 20, 'lr_all': 0.005, 'reg_all': 0.1}, RMSE:  0.93900, MAE:  0.74439, time_fit:  25.19084, time_test:  3.69020
Params: {'n_factors': 20, 'lr_all': 0.01, 'reg_all': 0.01}, RMSE:  0.94627, MAE:  0.73717, time_fit:  25.31426, time_test:  3.72346
Params: {'n_factors': 20, 'lr_all': 0.01

In [15]:
print('Start NMF test')
nmf_results = evaluate_model(NMF, param_nmf, data)

print("NMF Results:")
for result in nmf_results:
    params = result['parameters']
    rmse = result['mean_rmse']
    mae = result['mean_mae']
    time_fit = result['mean_fit_time']
    time_test = result['mean_test_time']
    print(f"Params: {params}, RMSE: {rmse: 0.5f}, MAE: {mae: 0.5f}, time_fit: {time_fit: 0.5f}, time_test: {time_test: 0.5f}")

Start NMF test
NMF Results:
Params: {'n_factors': 5, 'reg_pu': 0.005, 'reg_qi': 0.005}, RMSE:  2.13401, MAE:  1.92234, time_fit:  1.47392, time_test:  0.09048
Params: {'n_factors': 5, 'reg_pu': 0.005, 'reg_qi': 0.01}, RMSE:  2.04322, MAE:  1.83205, time_fit:  1.39229, time_test:  0.10010
Params: {'n_factors': 5, 'reg_pu': 0.005, 'reg_qi': 0.1}, RMSE:  1.32550, MAE:  1.12586, time_fit:  1.40358, time_test:  0.09279
Params: {'n_factors': 5, 'reg_pu': 0.005, 'reg_qi': 0.2}, RMSE:  1.12475, MAE:  0.93319, time_fit:  1.40313, time_test:  0.10312
Params: {'n_factors': 5, 'reg_pu': 0.01, 'reg_qi': 0.005}, RMSE:  2.05651, MAE:  1.84565, time_fit:  1.39572, time_test:  0.09055
Params: {'n_factors': 5, 'reg_pu': 0.01, 'reg_qi': 0.01}, RMSE:  1.94666, MAE:  1.73621, time_fit:  1.40347, time_test:  0.10218
Params: {'n_factors': 5, 'reg_pu': 0.01, 'reg_qi': 0.1}, RMSE:  1.24929, MAE:  1.05337, time_fit:  1.40688, time_test:  0.09327
Params: {'n_factors': 5, 'reg_pu': 0.01, 'reg_qi': 0.2}, RMSE:  1.

In [16]:
import joblib

In [17]:
joblib.dump(svd_results, 'Results\\svd_results.pkl')
joblib.dump(svdpp_results, 'Results\\svdpp_results.pkl')
joblib.dump(nmf_results, 'Results\\nmf_results.pkl')

['Results\\nmf_results.pkl']

In [20]:
results = np.hstack([svd_results, svdpp_results, nmf_results])