# Recommentation models

In [1]:
import pandas as pd
import numpy as np

## Surprise library

In [2]:
from surprise import Reader, Dataset, KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline, accuracy
from surprise.model_selection import PredefinedKFold

In [57]:
reader = Reader(line_format = 'user item rating timestamp')
N_neighbors = 50
configuration = {'name':'MSD', 'user_based':False, 'min_suport':5}

files = [('../data/MovieLens_Data/ml-100k/u' + str(index) + '-base.base', '../data/MovieLens_Data/ml-100k/u' + str(index) + '-test.test') for index in range(1,6)]
Data = Dataset.load_from_folds(files, reader)
PKF = PredefinedKFold()

rmseBasic = []
rmseMeans = []
rmseZScore = []
rmseBaseline = []
for TrainData, TestData in PKF.split(Data):
    #KNNBaisc
    oKNNBasic = KNNBasic(k = N_neighbors, sim_options = configuration, verbose = False)
    oKNNBasic.fit(TrainData)
    PredictionsKNNBasic = oKNNBasic.test(TestData)
    rmseBasic.append(accuracy.rmse(PredictionsKNNBasic, verbose=False))
    
    #KNNWithMeans
    oKNNMeans = KNNWithMeans(k = N_neighbors, sim_options = configuration, verbose = False)
    oKNNMeans.fit(TrainData)
    PredictionsKNNMeans = oKNNMeans.test(TestData)
    rmseMeans.append(accuracy.rmse(PredictionsKNNMeans, verbose=False))
    
    #KNNWithZscore
    oKNNZScore = KNNWithZScore(k = N_neighbors, sim_options = configuration, verbose = False)
    oKNNZScore.fit(TrainData)
    PredictionsKNNZScore = oKNNZScore.test(TestData)
    rmseZScore.append(accuracy.rmse(PredictionsKNNZScore, verbose=False))
    
    
    #KNNBaseline
    oKNNBaseline = KNNBaseline(k = N_neighbors, sim_options = configuration, verbose = False)
    oKNNBaseline.fit(TrainData)
    PredictionsKNNBaseline = oKNNBaseline.test(TestData)
    rmseBaseline.append(accuracy.rmse(PredictionsKNNBaseline, verbose=False))
    
    
print('KNNBasic: ', np.array(rmseBasic).mean())
print('KNNWithMeans: ', np.array(rmseMeans).mean())
print('KNNWithZScore: ', np.array(rmseZScore).mean())
print('KNNBaseline: ', np.array(rmseBaseline).mean())

KNNBasic:  0.9799920554793488
KNNWithMeans:  0.9358400618621564
KNNWithZScore:  0.9379091906065818
KNNBaseline:  0.932266426084659


**Train model with best parameters with all data**

In [38]:
from surprise.dump import dump

reader = Reader(line_format = 'user item rating timestamp')
best_configuration = {'name':'MSD', 'user_based':False, 'min_suport':5}
Model = KNNBaseline(k = 50, sim_options = best_configuration, verbose = False)
TrainData = Dataset.load_from_file('../data/MovieLens_Data/ml-100k/u-data.data', reader).build_full_trainset()
Model.fit(TrainData)
dump('RecommendationModel.pkl', algo = Model)

**Load model to test**

In [39]:
from surprise.dump import load
Model = load('RecommendationModel.pkl')[1]

In [40]:
N_predictions = 5
N_Users = 943
N_Items = 1682
#Get random user and item ids to send to predict
user_ids = np.random.randint(1, N_Users + 1, N_predictions).astype(str)
item_ids = np.random.randint(1, N_Items + 1, N_predictions).astype(str)

In [42]:
#Get predictions
for i in range(N_predictions):
    prediction = Model.predict(user_ids[i], item_ids[i])
    print(prediction)

user: 812        item: 1279       r_ui = None   est = 3.61   {'actual_k': 20, 'was_impossible': False}
user: 603        item: 1578       r_ui = None   est = 3.39   {'actual_k': 43, 'was_impossible': False}
user: 204        item: 83         r_ui = None   est = 3.92   {'actual_k': 42, 'was_impossible': False}
user: 784        item: 1137       r_ui = None   est = 4.23   {'actual_k': 39, 'was_impossible': False}
user: 802        item: 938        r_ui = None   est = 3.30   {'actual_k': 50, 'was_impossible': False}


In [31]:
print(Model)

<surprise.prediction_algorithms.knns.KNNWithZScore object at 0x0000026218F1B3A0>
