
# Recommender Similarity Algorithm Downselect File

## Import Libraries
> We are using the Suprise library because it has all the tools we need to import our dataset, <br>
calculate the similarities and errors from all the similairity algorithms, fit the algorithms <br>
to our dataset, and then output the similarity matrix.

In [146]:
!pip install surprise



In [147]:
import json
import time
import numpy as np
import pandas as pd
from surprise.model_selection import GridSearchCV
from surprise.model_selection import RandomizedSearchCV
from surprise import accuracy
from collections import defaultdict
from surprise import AlgoBase
from surprise import NormalPredictor
from surprise import PredictionImpossible
from surprise import KNNBasic
from surprise import KNNWithMeans
from surprise import KNNWithZScore
from surprise import SVD
from surprise import SVDpp
from surprise import Dataset
from surprise import Reader
from surprise import dump
from surprise.model_selection import cross_validate

## Import Preprocessed User, Restaurant, and Review Data

In [148]:
start = time.time()
with open('MA_users.json', encoding="utf8") as fin:
    print('Reading',fin.name)
    user_data = json.load(fin)
end = time.time()
duration_without_dr = end-start
print("Time taken to load user data: %d seconds" %duration_without_dr)


Reading MA_users.json
Time taken to load user data: 1 seconds


In [149]:
start = time.time()
with open('MA_restaurants.json', encoding="utf8") as fin:
    print('Reading',fin.name)
    restaurant_data = json.load(fin)
end = time.time()
duration_without_dr = end-start
print("Time taken to load restaurant data: %d seconds" %duration_without_dr)

Reading MA_restaurants.json
Time taken to load restaurant data: 0 seconds


In [150]:
start = time.time()
with open('MA_reviews.json', encoding="utf8") as fin:
    print('Reading',fin.name)
    review_data = json.load(fin)
end = time.time()
duration_without_dr = end-start
print("Time taken to load review data: %d seconds" %duration_without_dr)

Reading MA_reviews.json
Time taken to load review data: 4 seconds


In [151]:
print(str(len(user_data)) + ' user records loaded')
print(str(len(review_data)) + ' review records loaded')
print(str(len(restaurant_data)) + ' business records loaded')

125521 user records loaded
914710 review records loaded
10550 business records loaded


Create a 2d arary of user IDs and buisness IDs, with ratings(stars) as frequencies

In [152]:
user_id_dict = dict()
for i,u in enumerate(user_data):
    user_id_dict[u['user_id']] = i

business_id_dict = dict()
for i,r in enumerate(restaurant_data):
    business_id_dict[r['business_id']] = i

In [153]:
rating_mat = np.zeros((len(user_data), len(restaurant_data)))
rating_dict = {'user_id': [],
               'business_id': [],
               'rating': []}

for review in review_data:
    user_idx = user_id_dict[review['user_id']]
    business_idx = business_id_dict[review['business_id']]
    rating_mat[user_idx][business_idx] = review['stars']
    
    rating_dict['user_id'].append(review['user_id'])
    rating_dict['business_id'].append(review['business_id'])
    rating_dict['rating'].append(review['stars'])

print(rating_mat.shape)
print('number of non-zero elements(good ratings): ' +str(np.count_nonzero(rating_mat)))

(125521, 10550)
number of non-zero elements(good ratings): 879189


# This code will be removed and put into preprocessing later

In [154]:
df = pd.DataFrame(rating_dict)

# filter to only the users/businesses with over 50 reviews
# necessary in order to build the anti testset
filter_user = df['user_id'].value_counts() >= 15
filter_user = filter_user[filter_user].index.tolist()

filter_business = df['business_id'].value_counts() > 40
filter_business = filter_business[filter_business].index.tolist()

df_new = df[(df['user_id'].isin(filter_user)) & (df['business_id'].isin(filter_business))]

print('The original data frame shape:\t{}'.format(df.shape))
print('The new data frame shape:\t{}'.format(df_new.shape))

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_new[['user_id', 'business_id', 'rating']], reader)

The original data frame shape:	(914710, 3)
The new data frame shape:	(475458, 3)


In [155]:
if 'rating_mat' in globals():
    del rating_mat
    del df

In [156]:
sim_options = {
    "name": ["msd","cosine"],
    "min_support": [1,5],
    "user_based": [False,True]
}
param_grid = {"k": [10, 40],
              "min_k:": [1, 5, 10],
              "sim_options": sim_options}

In [159]:
#if 'grid_search' in globals():
#    del grid_search
#grid_search = GridSearchCV(KNNBasic, param_grid, measures = ["rmse", "mae"], cv=5, n_jobs=3,joblib_verbose=10)
#grid_search.fit(data)

[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done   2 tasks      | elapsed:    7.9s
[Parallel(n_jobs=3)]: Done   7 tasks      | elapsed:   32.7s
[Parallel(n_jobs=3)]: Done  12 tasks      | elapsed:   52.2s
[Parallel(n_jobs=3)]: Done  19 tasks      | elapsed:  1.5min
[Parallel(n_jobs=3)]: Done  26 tasks      | elapsed:  2.4min
[Parallel(n_jobs=3)]: Done  35 tasks      | elapsed:  3.3min
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:  4.2min
[Parallel(n_jobs=3)]: Done  55 tasks      | elapsed:  4.9min
[Parallel(n_jobs=3)]: Done  66 tasks      | elapsed:  6.4min
[Parallel(n_jobs=3)]: Done  79 tasks      | elapsed:  7.9min
[Parallel(n_jobs=3)]: Done  92 tasks      | elapsed:  8.8min
[Parallel(n_jobs=3)]: Done 107 tasks      | elapsed: 10.5min
[Parallel(n_jobs=3)]: Done 122 tasks      | elapsed: 12.2min
[Parallel(n_jobs=3)]: Done 139 tasks      | elapsed: 13.7min
[Parallel(n_jobs=3)]: Done 156 tasks      | elapsed: 16.1min
[Parallel(

In [160]:
#KNNBasic_results_df = pd.DataFrame.from_dict(grid_search.cv_results)
#for n in range(len(KNNBasic_results_df.params)):
#    print('Mean RSME:',KNNBasic_results_df.mean_test_rmse[n], 'Params:',KNNBasic_results_df.params[n])
#print('\nBest RSME:',grid_search.best_score["rmse"],'Best Params:',grid_search.best_params["rmse"])

Mean RSME: 1.1207483197619375 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}}
Mean RSME: 1.1134275197738244 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}}
Mean RSME: 1.1579731853753492 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}}
Mean RSME: 1.148398591723234 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}}
Mean RSME: 1.131750952774477 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}}
Mean RSME: 1.1185394953067425 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}}
Mean RSME: 1.167884894893078 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}}
Mean RSME: 1.1553289834961757 Params: {'k': 10, 'min_k:': 1, 'sim_options

[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers. <br>
[Parallel(n_jobs=3)]: Done   2 tasks      | elapsed:    7.9s <br>
[Parallel(n_jobs=3)]: Done   7 tasks      | elapsed:   32.7s <br>
[Parallel(n_jobs=3)]: Done  12 tasks      | elapsed:   52.2s <br>
[Parallel(n_jobs=3)]: Done  19 tasks      | elapsed:  1.5min <br>
[Parallel(n_jobs=3)]: Done  26 tasks      | elapsed:  2.4min <br>
[Parallel(n_jobs=3)]: Done  35 tasks      | elapsed:  3.3min <br>
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:  4.2min <br>
[Parallel(n_jobs=3)]: Done  55 tasks      | elapsed:  4.9min <br>
[Parallel(n_jobs=3)]: Done  66 tasks      | elapsed:  6.4min <br>
[Parallel(n_jobs=3)]: Done  79 tasks      | elapsed:  7.9min <br>
[Parallel(n_jobs=3)]: Done  92 tasks      | elapsed:  8.8min <br>
[Parallel(n_jobs=3)]: Done 107 tasks      | elapsed: 10.5min <br>
[Parallel(n_jobs=3)]: Done 122 tasks      | elapsed: 12.2min <br>
[Parallel(n_jobs=3)]: Done 139 tasks      | elapsed: 13.7min <br>
[Parallel(n_jobs=3)]: Done 156 tasks      | elapsed: 16.1min <br>
[Parallel(n_jobs=3)]: Done 175 tasks      | elapsed: 17.6min <br>
[Parallel(n_jobs=3)]: Done 194 tasks      | elapsed: 20.1min <br>
[Parallel(n_jobs=3)]: Done 215 tasks      | elapsed: 22.1min <br>
[Parallel(n_jobs=3)]: Done 240 out of 240 | elapsed: 25.5min finished  <br>

Mean RSME: 1.1207483197619375 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.1134275197738244 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1579731853753492 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.148398591723234 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.131750952774477 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.1185394953067425 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.167884894893078 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1553289834961757 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.1207483197619375 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.1134275197738244 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1579731853753492 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.148398591723234 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.131750952774477 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.1185394953067425 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.167884894893078 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1553289834961757 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.1207483197619375 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.1134275197738244 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1579731853753492 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.148398591723234 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.131750952774477 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.1185394953067425 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.167884894893078 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1553289834961757 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.098864093774702 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.079762446415431 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1519888017006183 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1424616885995078 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.106419720913237 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.085419041395013 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1583798093010693 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1484190090912159 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.098864093774702 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.079762446415431 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1519888017006183 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1424616885995078 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.106419720913237 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.085419041395013 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1583798093010693 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1484190090912159 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.098864093774702 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.079762446415431 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1519888017006183 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1424616885995078 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.106419720913237 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.085419041395013 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1583798093010693 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1484190090912159 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>  <br>

Best RSME: 1.079762446415431 Best Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}}

In [161]:
#if 'grid_search' in globals():
#    del grid_search
#grid_search = GridSearchCV(KNNWithMeans, param_grid, measures = ["rmse", "mae"], cv=5, n_jobs=3,joblib_verbose=10)
#grid_search.fit(data)

[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done   2 tasks      | elapsed:    7.7s
[Parallel(n_jobs=3)]: Done   7 tasks      | elapsed:   33.4s
[Parallel(n_jobs=3)]: Done  12 tasks      | elapsed:   52.8s
[Parallel(n_jobs=3)]: Done  19 tasks      | elapsed:  1.5min
[Parallel(n_jobs=3)]: Done  26 tasks      | elapsed:  2.5min
[Parallel(n_jobs=3)]: Done  35 tasks      | elapsed:  3.3min
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:  4.2min
[Parallel(n_jobs=3)]: Done  55 tasks      | elapsed:  4.9min
[Parallel(n_jobs=3)]: Done  66 tasks      | elapsed:  6.4min
[Parallel(n_jobs=3)]: Done  79 tasks      | elapsed:  7.8min
[Parallel(n_jobs=3)]: Done  92 tasks      | elapsed:  8.7min
[Parallel(n_jobs=3)]: Done 107 tasks      | elapsed: 10.4min
[Parallel(n_jobs=3)]: Done 122 tasks      | elapsed: 12.0min
[Parallel(n_jobs=3)]: Done 139 tasks      | elapsed: 13.6min
[Parallel(n_jobs=3)]: Done 156 tasks      | elapsed: 15.9min
[Parallel(

In [162]:
#KNNMeans_results_df = pd.DataFrame.from_dict(grid_search.cv_results)
#for n in range(len(KNNMeans_results_df.params)):
#    print('Mean RSME:',KNNMeans_results_df.mean_test_rmse[n], 'Params:',KNNMeans_results_df.params[n])
#print('\nBest RSME:',grid_search.best_score["rmse"],'Best Params:',grid_search.best_params["rmse"])

Mean RSME: 1.0655983534710998 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}}
Mean RSME: 1.0810146843927848 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}}
Mean RSME: 1.0969267715454047 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}}
Mean RSME: 1.1072333151450648 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}}
Mean RSME: 1.0588264076304275 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}}
Mean RSME: 1.075620146814002 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}}
Mean RSME: 1.093913698842813 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}}
Mean RSME: 1.1061819100823953 Params: {'k': 10, 'min_k:': 1, 'sim_option

[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers. <br>
[Parallel(n_jobs=3)]: Done   2 tasks      | elapsed:    7.7s <br>
[Parallel(n_jobs=3)]: Done   7 tasks      | elapsed:   33.4s <br>
[Parallel(n_jobs=3)]: Done  12 tasks      | elapsed:   52.8s <br>
[Parallel(n_jobs=3)]: Done  19 tasks      | elapsed:  1.5min <br>
[Parallel(n_jobs=3)]: Done  26 tasks      | elapsed:  2.5min <br>
[Parallel(n_jobs=3)]: Done  35 tasks      | elapsed:  3.3min <br>
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:  4.2min <br>
[Parallel(n_jobs=3)]: Done  55 tasks      | elapsed:  4.9min <br>
[Parallel(n_jobs=3)]: Done  66 tasks      | elapsed:  6.4min <br>
[Parallel(n_jobs=3)]: Done  79 tasks      | elapsed:  7.8min <br>
[Parallel(n_jobs=3)]: Done  92 tasks      | elapsed:  8.7min <br>
[Parallel(n_jobs=3)]: Done 107 tasks      | elapsed: 10.4min <br>
[Parallel(n_jobs=3)]: Done 122 tasks      | elapsed: 12.0min <br>
[Parallel(n_jobs=3)]: Done 139 tasks      | elapsed: 13.6min <br>
[Parallel(n_jobs=3)]: Done 156 tasks      | elapsed: 15.9min <br>
[Parallel(n_jobs=3)]: Done 175 tasks      | elapsed: 17.4min <br>
[Parallel(n_jobs=3)]: Done 194 tasks      | elapsed: 19.9min <br>
[Parallel(n_jobs=3)]: Done 215 tasks      | elapsed: 21.8min <br>
[Parallel(n_jobs=3)]: Done 240 out of 240 | elapsed: 25.3min finished <br>  <br>

Mean RSME: 1.0655983534710998 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0810146843927848 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.0969267715454047 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1072333151450648 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0588264076304275 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.075620146814002 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.093913698842813 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1061819100823953 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0655983534710998 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0810146843927848 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.0969267715454047 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1072333151450648 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0588264076304275 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.075620146814002 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.093913698842813 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1061819100823953 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0655983534710998 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0810146843927848 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.0969267715454047 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1072333151450648 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0588264076304275 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.075620146814002 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.093913698842813 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1061819100823953 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0418043175562492 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.050525769096177 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.086325666101672 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.0991071861591728 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.036794208238455 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0464099209984212 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.084175614300176 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.0989079211079469 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0418043175562492 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.050525769096177 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.086325666101672 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.0991071861591728 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.036794208238455 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0464099209984212 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.084175614300176 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.0989079211079469 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0418043175562492 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.050525769096177 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.086325666101672 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.0991071861591728 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.036794208238455 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0464099209984212 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.084175614300176 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.0989079211079469 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>  <br>

Best RSME: 1.036794208238455 Best Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}}

In [163]:
#if 'grid_search' in globals():
#    del grid_search
#grid_search = GridSearchCV(KNNWithZScore, param_grid, measures = ["rmse", "mae"], cv=5, n_jobs=3,joblib_verbose=10)
#grid_search.fit(data)

[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done   2 tasks      | elapsed:    8.9s
[Parallel(n_jobs=3)]: Done   7 tasks      | elapsed:   37.7s
[Parallel(n_jobs=3)]: Done  12 tasks      | elapsed:   58.3s
[Parallel(n_jobs=3)]: Done  19 tasks      | elapsed:  1.6min
[Parallel(n_jobs=3)]: Done  26 tasks      | elapsed:  2.6min
[Parallel(n_jobs=3)]: Done  35 tasks      | elapsed:  3.6min
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:  4.5min
[Parallel(n_jobs=3)]: Done  55 tasks      | elapsed:  5.3min
[Parallel(n_jobs=3)]: Done  66 tasks      | elapsed:  6.9min
[Parallel(n_jobs=3)]: Done  79 tasks      | elapsed:  8.4min
[Parallel(n_jobs=3)]: Done  92 tasks      | elapsed:  9.4min
[Parallel(n_jobs=3)]: Done 107 tasks      | elapsed: 11.2min
[Parallel(n_jobs=3)]: Done 122 tasks      | elapsed: 12.9min
[Parallel(n_jobs=3)]: Done 139 tasks      | elapsed: 14.6min
[Parallel(n_jobs=3)]: Done 156 tasks      | elapsed: 17.1min
[Parallel(

In [164]:
#KNNZScore_results_df = pd.DataFrame.from_dict(grid_search.cv_results)
#for n in range(len(KNNZScore_results_df.params)):
#    print('Mean RSME:',KNNZScore_results_df.mean_test_rmse[n], 'Params:',KNNZScore_results_df.params[n])
#print('\nBest RSME:',grid_search.best_score["rmse"],'Best Params:',grid_search.best_params["rmse"])

Mean RSME: 1.0683386376029975 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}}
Mean RSME: 1.0859524367717235 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}}
Mean RSME: 1.1045790112123492 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}}
Mean RSME: 1.124366935600634 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}}
Mean RSME: 1.060761589193492 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}}
Mean RSME: 1.0767696237615965 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}}
Mean RSME: 1.1016096448920973 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}}
Mean RSME: 1.121876315356279 Params: {'k': 10, 'min_k:': 1, 'sim_options

[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers. <br>
[Parallel(n_jobs=3)]: Done   2 tasks      | elapsed:    8.9s <br>
[Parallel(n_jobs=3)]: Done   7 tasks      | elapsed:   37.7s <br>
[Parallel(n_jobs=3)]: Done  12 tasks      | elapsed:   58.3s <br>
[Parallel(n_jobs=3)]: Done  19 tasks      | elapsed:  1.6min <br>
[Parallel(n_jobs=3)]: Done  26 tasks      | elapsed:  2.6min <br>
[Parallel(n_jobs=3)]: Done  35 tasks      | elapsed:  3.6min <br>
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:  4.5min <br>
[Parallel(n_jobs=3)]: Done  55 tasks      | elapsed:  5.3min <br>
[Parallel(n_jobs=3)]: Done  66 tasks      | elapsed:  6.9min <br>
[Parallel(n_jobs=3)]: Done  79 tasks      | elapsed:  8.4min <br>
[Parallel(n_jobs=3)]: Done  92 tasks      | elapsed:  9.4min <br>
[Parallel(n_jobs=3)]: Done 107 tasks      | elapsed: 11.2min <br>
[Parallel(n_jobs=3)]: Done 122 tasks      | elapsed: 12.9min <br>
[Parallel(n_jobs=3)]: Done 139 tasks      | elapsed: 14.6min <br>
[Parallel(n_jobs=3)]: Done 156 tasks      | elapsed: 17.1min <br>
[Parallel(n_jobs=3)]: Done 175 tasks      | elapsed: 18.7min <br>
[Parallel(n_jobs=3)]: Done 194 tasks      | elapsed: 21.4min <br>
[Parallel(n_jobs=3)]: Done 215 tasks      | elapsed: 23.5min <br>
[Parallel(n_jobs=3)]: Done 240 out of 240 | elapsed: 27.1min finished <br> <br>

Mean RSME: 1.0683386376029975 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0859524367717235 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1045790112123492 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.124366935600634 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.060761589193492 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0767696237615965 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1016096448920973 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.121876315356279 Params: {'k': 10, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0683386376029975 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0859524367717235 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1045790112123492 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.124366935600634 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.060761589193492 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0767696237615965 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1016096448920973 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1016096448920973 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1016096448920973 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1016096448920973 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.121876315356279 Params: {'k': 10, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br> 
Mean RSME: 1.0683386376029975 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0859524367717235 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1045790112123492 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.124366935600634 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.060761589193492 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0767696237615965 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.1016096448920973 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.121876315356279 Params: {'k': 10, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.04433479448415 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0552517611900165 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.0940389811938673 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1150317967049255 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0387852982068733 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0479187969087314 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.091420180856124 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1133504557199738 Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.04433479448415 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0552517611900165 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.0940389811938673 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1150317967049255 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0387852982068733 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0479187969087314 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.091420180856124 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1133504557199738 Params: {'k': 40, 'min_k:': 5, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.04433479448415 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0552517611900165 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.0940389811938673 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1150317967049255 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}} <br>
Mean RSME: 1.0387852982068733 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}} <br>
Mean RSME: 1.0479187969087314 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': True}} <br>
Mean RSME: 1.091420180856124 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': False}} <br>
Mean RSME: 1.1133504557199738 Params: {'k': 40, 'min_k:': 10, 'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}} <br> <br>

Best RSME: 1.0387852982068733 Best Params: {'k': 40, 'min_k:': 1, 'sim_options': {'name': 'cosine', 'min_support': 1, 'user_based': False}}

In [None]:
from surprise import SVD
from surprise import SVDpp

In [None]:
sim_options = {
    "n_epochs": [5, 10, 20, 30],
    "lr_all": [.0025, .005, .001, .01]}

param_grid = {"sim_options": sim_options}

In [None]:
if 'grid_search' in globals():
    del grid_search
grid_search = GridSearchCV(SVD, param_grid, measures = ["rmse", "mae"], cv=5, n_jobs=3,joblib_verbose=10)
grid_search.fit(data)

In [None]:
SVD_results_df = pd.DataFrame.from_dict(grid_search.cv_results)
for n in range(len(SVD_results_df.params)):
    print('Mean RSME:',SVD_results_df.mean_test_rmse[n], 'Params:',SVD_results_df.params[n])
print('\nBest RSME:',grid_search.best_score["rmse"],'Best Params:',grid_search.best_params["rmse"])

In [None]:
if 'grid_search' in globals():
    del grid_search
grid_search = GridSearchCV(SVDpp, param_grid, measures = ["rmse", "mae"], cv=5, n_jobs=3,joblib_verbose=10)
grid_search.fit(data)

In [None]:
SVDpp_results_df = pd.DataFrame.from_dict(grid_search.cv_results)
for n in range(len(SVDpp_results_df.params)):
    print('Mean RSME:',SVDpp_results_df.mean_test_rmse[n], 'Params:',SVDpp_results_df.params[n])
print('\nBest RSME:',grid_search.best_score["rmse"],'Best Params:',grid_search.best_params["rmse"])

## Algorithm and Parameter Tuning

### Validation methods
Parameter grid search given the below error measurements:
#### Root Mean Square Error (RMSE)
* Measures standard deviation of errors in set of predictions
* Goal: minimize RMSE

#### Mean Absolute Error (MAE)
* Measures average magnitude of errors in set of predictions
* Goal: minimize MAE