In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("./data/ratings.csv")
data.head()


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [8]:
data.dtypes

userId         int64
movieId        int64
rating       float64
timestamp      int64
dtype: object

In [9]:
data.isnull().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [21]:
import numpy as np
import pandas as pd

from surprise import Reader, Dataset, SVD, BaselineOnly, NMF, accuracy, KNNBasic
from surprise.model_selection import train_test_split

In [22]:
class DatasetBuilder():
    def __init__(self, data_location):
        reader = Reader()
        self.ratings = pd.read_csv(f"{data_location}/ratings.csv")
        self.movies = pd.read_csv(f'{data_location}/movies.csv')
        
        self.dataset = Dataset.load_from_df(self.ratings[['userId', 'movieId', 'rating']], reader)
        self.train_dataset, self.test_dataset = train_test_split(self.dataset, test_size=0.2)
        
    def get_movie_title(movie_id):
        self.movies[self.movies['movieId'] == movie_id].title

In [23]:
class AlgosGym():
    def __init__(self, dataset):
        self.algos = []
        self.dataset = dataset
        
    def addAlgorithm(self, algo):
        self.algos.append(algo)
        
    def train_and_evaluate(self):
        for algo in self.algos:
            algo.fit(self.dataset.train_dataset)
            predictions = algo.test(self.dataset.test_dataset)
            rmse = accuracy.rmse(predictions)
            mae = accuracy.mae(predictions)
            
            print('-----------')
            print(f'{algo.__class__.__name__}') 
            print('-----------')
            print(f'      Metrics - RMSE: {rmse}, MAE: {mae}')
            print('-----------')
              

In [25]:
dataset = DatasetBuilder('./data/')
gym = AlgosGym(dataset)

knn = KNNBasic()
gym.addAlgorithm(knn)

svd = SVD()
gym.addAlgorithm(svd)

nmf = NMF()
gym.addAlgorithm(nmf)

gym.train_and_evaluate()

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9486
MAE:  0.7242
-----------
KNNBasic
-----------
      Metrics - RMSE: 0.9485696565537738, MAE: 0.724154542202811
-----------
RMSE: 0.8772
MAE:  0.6725
-----------
SVD
-----------
      Metrics - RMSE: 0.8771965410299277, MAE: 0.6725162427173131
-----------
RMSE: 0.9247
MAE:  0.7059
-----------
NMF
-----------
      Metrics - RMSE: 0.9247219188829704, MAE: 0.705914240846551
-----------
