In [None]:
import pandas as pd 
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from plotly.offline import init_notebook_mode, plot, iplot
import plotly.graph_objs as go
from IPython.display import Image
init_notebook_mode(connected=True)
%matplotlib inline

import surprise
from surprise import KNNBasic
from surprise.model_selection import GridSearchCV
from surprise import Reader
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise import KNNWithMeans

In [None]:
rating_df = pd.read_csv('ratings_small.csv')

In [None]:
rating_df

In [None]:
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(rating_df[['userId', 'movieId', 'rating']], reader)

In [None]:
collector = []

for i in [SVD(biased=False), KNNBasic(sim_options = {'user_based': True }), KNNBasic(sim_options = {'user_based': False})]:
    results = cross_validate(i, data, measures=['MAE', 'RMSE'], cv = 5, verbose=True)

    item = pd.DataFrame.from_dict(results).mean(axis=0)
    item = item.append(pd.Series([str(i).split(' ')[0].split('.')[-1]], index=['i']))

    collector.append(item)

In [None]:
collector = pd.DataFrame(collector)

new_algorithms = ['PMF','UserCF','ItemCF']
collector['i'] = new_algorithms

In [None]:
collector2 = []

for i in [KNNBasic(sim_options = {'name':'cosine','user_based': True}), KNNBasic(sim_options = {'name':'MSD', 'user_based':True }),
                 KNNBasic(sim_options = {'name':'pearson','user_based': True}),
                 KNNBasic(sim_options = {'name':'cosine', 'user_based':False }),KNNBasic(sim_options = {'name':'MSD', 'user_based':False }),
                 KNNBasic(sim_options = {'name':'pearson','user_based': False}) 
                 ]:
    
    results = cross_validate(i, data, measures=['RMSE', 'MAE'], cv=5, verbose=False)
    
    item = pd.DataFrame.from_dict(results).mean(axis=0)
    item = item.append(pd.Series([str(i).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    
    collector.append(item)

In [None]:
new_algorithms2 = ['Cosine-UserCF','MSD-UserCF','Pearson-UserCF','Cosine-ItemCF','MSD-ItemCF','Pearson-ItemCF']
collector2['i'] = new_algorithms2

results2 = collector2.set_index('i').sort_values('test_rmse', ascending=False)
results2

In [None]:

data = results2[['test_rmse', 'test_mae']]
matrix = data.values

horizontal = [label.split('_')[1].upper() for label in data.columns.tolist()]
vertical = data.index.tolist()

h_label = 'Function'
v_label = 'Algorithm'


hovertexts = []
annotations = []

for i, y_value in enumerate(vertical):
    row = []
    for j, x_value in enumerate(horizontal):
        annotation = matrix[i, j]
        row.append('Error: {:.4f}<br>{}: {}<br>{}: {}<br>Fit Time: {:.3f}s<br>Test Time: {:.3f}s'.format(annotation, v_label, y_value ,h_label, x_value, 
                                                                                                         results2.loc[y_value]['fit_time'], 
                                                                                                         results2.loc[y_value]['test_time']))
        annotations.append(dict(x=x_value, y=y_value, text='{:.4f}'.format(annotation), ax=0, ay=0, font=dict(color='#000000')))
    hovertexts.append(row)

trace = go.Heatmap(x = horizontal,
                   y = vertical,
                   z = data.values,
                   text = hovertexts,
                   hoverinfo = 'text',
                   colorscale = 'Picnic',
                   colorbar = dict(title = 'Error'))

layout = go.Layout(title = 'Cross-validated Comparison of Algorithms',
                   xaxis = dict(title = h_label),
                   yaxis = dict(title = v_label,
                                tickangle = -40),
                   annotations = annotations)

fig = go.Figure(data=[trace], layout=layout)
img_bytes = fig.to_image(format = "png", width=600, height=450, scale=2)
Image(img_bytes)

In [None]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(rating_df[['userId', 'movieId', 'rating']], reader)

In [None]:

collector_ucf = []

for i in range(1,30):
    algorithm =KNNBasic(k=i, sim_options = {'name':'MSD','user_based': True})
    results = cross_validate(algorithm, data, measures=['RMSE', 'MAE'], cv=3, verbose=False)
    
    init_notebook_mode = pd.DataFrame.from_dict(results).mean(axis=0)
       
    collector_ucf.append(item)

In [None]:
collector_icf = []

for i in range(1,30):
    algorithm = KNNBasic(k=i, sim_options = {'name':'MSD','user_based': False})
    results = cross_validate(algorithm, data, measures=['RMSE','MAE'], cv=3, verbose=False)
    
    item = pd.DataFrame.from_dict(results).mean(axis=0)
        
    collector_icf.append(item)

In [None]:
benchmark_ucf = pd.DataFrame(collector_ucf)
benchmark_icf = pd.DataFrame(collector_icf)

acc_userCF1 = benchmark_ucf['test_rmse']
acc_itemCF1 = benchmark_icf['test_rmse']

acc_userCF2 = benchmark_ucf['test_mae']
acc_itemCF2 = benchmark_icf['test_mae']

In [None]:
acc_itemCF1

In [None]:
acc_userCF1

In [None]:
plt.figure(figsize=(12,8))
plt.plot(range(1,30), acc_userCF1, label = "User-based CF")
plt.plot(range(1,30), acc_itemCF1, label = "Item-based CF")
plt.title('')
plt.xlabel('Number of neighbors (K)', fontsize=12)
plt.ylabel('RMSE', fontsize=12)

plt.title('K Neighbors vs. RMSE (User-based CF and Item-based CF)', fontsize=18, y=1.03)
plt.legend(loc='best')
plt.grid(ls='dotted')

plt.savefig("plot_f (RMSE).png", dpi=300)

plt.show()

In [None]:
plt.figure(figsize=(12,8))
plt.plot(range(1,30), acc_userCF2, label = "User-based CF")
plt.plot(range(1,30), acc_itemCF2, label = "Item-based CF")
plt.title('')
plt.xlabel('Number of neighbors (K)', fontsize=12)
plt.ylabel('RMSE', fontsize=12)

plt.title('K Neighbors vs. MAE (User-based CF and Item-based CF)', fontsize=18, y=1.03)
plt.legend(loc='best')
plt.grid(ls='dotted')

plt.savefig("plot_f (MAE).png", dpi=300)

plt.show()

In [None]:
print(acc_userCF1.idxmin()+1, "RMSE:", min(acc_userCF1))
print(acc_itemCF1.idxmin()+1 , "RMSE:", min(acc_itemCF1))

In [None]:
print(acc_userCF2.idxmin()+1, "MAE:", min(acc_userCF2))
print(acc_itemCF2.idxmin()+1 , "MAE:", min(acc_itemCF2))