In this notebook, all the algorithms that have been used thus far will be retrained and tested on the ml-1m
and then plotted against speed and accuracy

In [6]:
#list of algorithms
'''knn item, knn user, knn baseline,Knn hybrid ,svd, nmf, nmf bias, slope one, combination hybrid'''
from surprise import KNNBaseline, KNNBasic, SVD, NMF, SlopeOne, NormalPredictor
from own_algorithms.UserItemKNNv2 import UserItemKNNv2
from own_algorithms.Combination import Combiner
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse


In [7]:
import numpy as np, pandas as pd
import matplotlib.pyplot as plt


In [8]:
# build models
item_knn= KNNBasic(k=60, sim_options= {'name': 'cosine', 'user_based': False}, verbose=False)
user_knn= KNNBasic(k=55, sim_options={'name': 'Pearson', 'user_based': True}, verbose=False)
base_knn= KNNBaseline(k=40, verbose=False,sim_options={'name': 'pearson_baseline', 'min_support': 1, 'user_based': False,})
hybrid_knn= UserItemKNNv2()
svd= SVD(n_factors=250, lr_all=0.01, reg_all=0.1, n_epochs=50, random_state=1)
unbias_nmf=NMF(n_factors=20, lr_bu=0.02, lr_bi=0.02, reg_bu=0.05, reg_bi=0.2, n_epochs=70, biased=False, random_state=1)
bias_nmf=NMF(n_factors=20, lr_bu=0.02, lr_bi=0.02, reg_bu=0.05, reg_bi=0.2, n_epochs=70, biased=True, random_state=1)
slope= SlopeOne()
combiner= Combiner(0.65, 0.45)

In [9]:
from surprise import Dataset

# Load the ml-1m dataset
data = Dataset.load_builtin('ml-1m')

# Split the dataset into train and test sets
trainset, testset = train_test_split(data, test_size=0.2)

# Define the dataframe columns
columns = ['Algorithm', 'RMSE', 'Fit Time', 'Predict Time']
results_df = pd.DataFrame(columns=columns)

In [12]:
from surprise import accuracy

import time

def evaluate_algorithm(algo, algo_name):
    start_time = time.time()
    algo.fit(trainset)
    fit_time = time.time() - start_time

    start_time = time.time()
    predictions = algo.test(testset)
    pred_time = time.time() - start_time

    rmse = accuracy.rmse(predictions, verbose=False)

    results_df.loc[len(results_df)] = [algo_name, rmse, fit_time, pred_time]


In [None]:
# Evaluate each algorithm and store the results
evaluate_algorithm(item_knn, 'ItemKNN')
evaluate_algorithm(user_knn, 'UserKNN')
evaluate_algorithm(base_knn, 'BaselineKNN')
evaluate_algorithm(hybrid_knn, 'HybridKNN')
evaluate_algorithm(svd, 'SVD')
evaluate_algorithm(unbias_nmf, 'NMF (Unbiased)')
evaluate_algorithm(bias_nmf, 'NMF (Biased)')
evaluate_algorithm(slope, 'SlopeOne')
evaluate_algorithm(combiner, 'Combiner')

# Display the results dataframe
print(results_df)