### LOAD DATA


In [1]:
from surprise import Dataset

ratings = Dataset.load_builtin('ml-100k')
ratings


<surprise.dataset.DatasetAutoFolds at 0x26161b3a640>

In [2]:
from surprise.dataset import DatasetAutoFolds

def load_ratings_from_surprise() -> DatasetAutoFolds:
    ratings = Dataset.load_builtin('ml-100k')
    return ratings

load_ratings_from_surprise()

<surprise.dataset.DatasetAutoFolds at 0x26161b29b80>

In [3]:
df = Dataset.load_builtin('ml-100k')

In [4]:
def get_data(from_surprise : bool = True) -> DatasetAutoFolds:
    data = load_ratings_from_surprise() if from_surprise else load_ratings_from_file()
    return data

data = get_data(from_surprise=True)
data

<surprise.dataset.DatasetAutoFolds at 0x26161b3a580>

In [5]:
#TRAIN AND TEST

In [6]:

from surprise.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.2, random_state=42)
train

<surprise.trainset.Trainset at 0x26161a92670>

In [7]:
from surprise import SVD

model = SVD()

In [8]:
model.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x26161a8eaf0>

In [9]:
from surprise.trainset import Trainset
from  surprise.prediction_algorithms.algo_base import AlgoBase

from surprise.prediction_algorithms.knns import KNNBasic


def get_trained_model(model_class: AlgoBase, model_kwargs: dict, train_set: Trainset) -> AlgoBase:
    model = model_class(sim_options = model_kwargs)
    model.fit(train_set)
    return model

model_kwargs = {'sim_options': {'user_based': False, 'name': 'pearson'}}
get_trained_model(KNNBasic, {'user_based': False, 'name': 'pearson'}, train)
# {'sim_options': {'user_based': False, 'name': 'pearson'}} - **kwargs

Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x2617b366310>

### PREDICTIONS

In [10]:
predictions = model.test(test)
predictions[:10]


[Prediction(uid='907', iid='143', r_ui=5.0, est=4.933808951619199, details={'was_impossible': False}),
 Prediction(uid='371', iid='210', r_ui=4.0, est=4.078362962844308, details={'was_impossible': False}),
 Prediction(uid='218', iid='42', r_ui=4.0, est=3.595649012872151, details={'was_impossible': False}),
 Prediction(uid='829', iid='170', r_ui=4.0, est=4.0427760496270775, details={'was_impossible': False}),
 Prediction(uid='733', iid='277', r_ui=1.0, est=2.8749039859518057, details={'was_impossible': False}),
 Prediction(uid='363', iid='1512', r_ui=1.0, est=3.3366097673346116, details={'was_impossible': False}),
 Prediction(uid='193', iid='487', r_ui=5.0, est=3.800634691159829, details={'was_impossible': False}),
 Prediction(uid='808', iid='313', r_ui=5.0, est=4.727353848738377, details={'was_impossible': False}),
 Prediction(uid='557', iid='682', r_ui=2.0, est=3.0444185980429905, details={'was_impossible': False}),
 Prediction(uid='774', iid='196', r_ui=3.0, est=2.5511704830751993, d

In [11]:

from surprise import accuracy

accuracy.rmse(predictions=predictions)

RMSE: 0.9361


0.9360598063394577

In [12]:
accuracy.mae(predictions=predictions)

MAE:  0.7367


0.7366677482366707

In [13]:
from surprise import accuracy

def evaluate_model(model: AlgoBase, test_set: [(int, int, float)]) -> dict:
    predictions = model.test(test_set)
    metrics_dict = {}
    metrics_dict['RMSE'] = accuracy.rmse(predictions, verbose=False)
    metrics_dict['MAE'] = accuracy.rmse(predictions, verbose=False)
    return metrics_dict


In [14]:
from surprise.model_selection import train_test_split


from surprise.prediction_algorithms.knns import KNNBasic

def train_and_evalute_model_pipeline(model_class: AlgoBase, model_kwargs: dict = {},
                                     from_surprise: bool = True,
                                     test_size: float = 0.2) -> (AlgoBase, dict):
    data = get_data(from_surprise)
    train_set, test_set = train_test_split(data, test_size, random_state=42)
    model = get_trained_model(model_class, model_kwargs, train_set)
    metrics_dict = evaluate_model(model, test_set)
    return model, metrics_dict

my_model, metrics_dict = train_and_evalute_model_pipeline(KNNBasic)
metrics_dict

Computing the msd similarity matrix...
Done computing similarity matrix.


{'RMSE': 0.980150596704479, 'MAE': 0.980150596704479}

In [15]:
get_trained_model(KNNBasic, {'user_based': False, 'name': 'pearson'}, train)

Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x2617b366af0>

In [16]:
#BENCHMARKING

In [17]:
from surprise.prediction_algorithms.knns import KNNBasic

benchmark_dict = {}

model_kwargs = {'user_based': True, 'name': 'cosine'}
knn, metrics_dict = train_and_evalute_model_pipeline(KNNBasic, model_kwargs)
benchmark_dict['KNN user based cosine'] = metrics_dict

model_kwargs = {'user_based': True, 'name': 'pearson'}
knn, metrics_dict = train_and_evalute_model_pipeline(KNNBasic, model_kwargs)
benchmark_dict['KNN user based pearson'] = metrics_dict

model_kwargs = {'user_based': False, 'name': 'cosine'}
knn, metrics_dict = train_and_evalute_model_pipeline(KNNBasic, model_kwargs)
benchmark_dict['KNN item based cosine'] = metrics_dict

model_kwargs = {'user_based': False, 'name': 'pearson'}
knn, metrics_dict = train_and_evalute_model_pipeline(KNNBasic, model_kwargs)
benchmark_dict['KNN item based pearson'] = metrics_dict


benchmark_dict

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.


{'KNN user based cosine': {'RMSE': 1.0193536815834319,
  'MAE': 1.0193536815834319},
 'KNN user based pearson': {'RMSE': 1.0150350905205965,
  'MAE': 1.0150350905205965},
 'KNN item based cosine': {'RMSE': 1.0264295933767333,
  'MAE': 1.0264295933767333},
 'KNN item based pearson': {'RMSE': 1.041104054968961,
  'MAE': 1.041104054968961}}

In [18]:
benchmark_dict = {}

model_dict_list = [
    {
        'model_name' : 'KNN user based with cosine similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': True, 'name': 'cosine'}
    },
    {
        'model_name' : 'KNN user based with pearson similarity',
        'model_class' : KNNBasic,
        'model_kwargs' : {'user_based': True, 'name': 'pearson'}
    },
]

for model_dict in model_dict_list:
    model, metrics_dict = train_and_evalute_model_pipeline(
        model_dict['model_class'], model_dict['model_kwargs'])
    benchmark_dict[model_dict['model_name']] = metrics_dict
    model_dict['fitted_model'] = model
    
benchmark_dict

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.


{'KNN user based with cosine similarity': {'RMSE': 1.0193536815834319,
  'MAE': 1.0193536815834319},
 'KNN user based with pearson similarity': {'RMSE': 1.0150350905205965,
  'MAE': 1.0150350905205965}}

In [19]:
from surprise.model_selection import cross_validate

cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0096  1.0078  1.0093  1.0107  1.0207  1.0116  0.0046  
MAE (testset)     0.8014  0.8008  0.8030  0.8043  0.8072  0.8033  0.0023  
Fit time          5.57    5.11    6.42    6.08    5.39    5.71    0.48    
Test time         6.13    7.45    7.09    6.39    6.77    6.77    0.47    


{'test_rmse': array([1.00958923, 1.00780283, 1.00932833, 1.01068972, 1.02069169]),
 'test_mae': array([0.80138181, 0.80082149, 0.80297348, 0.80428877, 0.80722312]),
 'fit_time': (5.567511796951294,
  5.105734825134277,
  6.415756464004517,
  6.083662748336792,
  5.385116815567017),
 'test_time': (6.132904767990112,
  7.446448802947998,
  7.088389158248901,
  6.391512632369995,
  6.771937847137451)}

In [20]:
from surprise import SVDpp

In [None]:
from surprise import NMF
from surprise.model_selection import cross_validate
from surprise import NormalPredictor

In [22]:
algo = NormalPredictor()
perf = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5)
print(perf)

{'test_rmse': array([1.51764022, 1.50631212, 1.5262772 , 1.51479557, 1.51865728]), 'test_mae': array([1.2154921 , 1.20741847, 1.22411177, 1.21469814, 1.2162414 ]), 'fit_time': (0.2735099792480469, 0.35023951530456543, 0.3042285442352295, 0.2512531280517578, 0.28328633308410645), 'test_time': (0.4212214946746826, 0.2668466567993164, 0.4876401424407959, 0.40679097175598145, 0.25433945655822754)}


In [23]:
#NMF

In [None]:
from surprise import SVDpp
algo = SVDpp()
perf = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=10)

In [None]:
#SVD++

In [None]:
from surprise import NMF
algo = NMF()
perf = cross_validate(algo, df, measures=['RMSE', 'MAE'], cv=10)