In [74]:
import pandas as pd
import math
import os
import matplotlib.pyplot as plt

In [82]:
class MovieDataLoader:
    def __init__(self, file_path = '../data'):
        self.file_path = file_path
        self.__load_data()
        self.__clean_data()

    def __load_data(self):
        for file_name in os.listdir(self.file_path):
            self.file_name = []
            if file_name.endswith('.csv'):
                setattr(self, os.path.splitext(file_name)[0], pd.read_csv(os.path.join(self.file_path, file_name)))
            if file_name.endswith('.txt'):
                setattr(self, os.path.splitext(file_name)[0], pd.read_csv(os.path.join(self.file_path, file_name), sep='\t', engine='python'))

    def __clean_data(self):
        pass


class AvgMovieRecommender:
    def __init__(self, movie_data_loader):
        self.loader = movie_data_loader
        self.movies = self.loader.movies
        self.rates = self.loader.rates
        self.run()

    def run(self):
        self.rates['rate_avg_movie'] = self.rates.groupby('movie')['rate'].transform('mean')
        self.rates['rate_avg_movie_class'] = self.rates['rate_avg_movie'].apply(lambda x: math.floor(x + 0.5))
        self.rates = self.rates.sort_values(by='rate_avg_movie', ascending=False)
        print(self.rates.head(4))
        print("=" * 42)
        return self
    
class Analyzer:
    def __init__(self, recommender):
        self.recommender = recommender
        self.rates = self.recommender.rates

    def evaluate(self):
        Y_HAT_REG = 'rate_avg_movie'
        Y_HAT_CLA = 'rate_avg_movie_class'
        self.__eval_regression(Y_HAT_REG)
        print("=" * 42)
        self.__eval_classification(Y_HAT_CLA)
        return self
    
    def __eval_regression(self, Y_HAT):
        self.MAE = sum(abs(self.rates['rate'] - self.rates[str(Y_HAT)])) / len(self.rates)
        self.MSE = sum((self.rates['rate'] - self.rates[str(Y_HAT)]) ** 2) / len(self.rates)
        self.RMSE = self.MSE ** 0.5
        self.MAPE = sum(abs(self.rates['rate'] - self.rates[str(Y_HAT)]) / self.rates['rate']) / len(self.rates)
        print(f"MAE:\t{self.MAE}, \nMSE:\t{self.MSE}, \nRMSE:\t{self.RMSE} \nMAPE:\t{self.MAPE}")

    def __eval_classification(self, Y_HAT):
        self.ConfusionMatrix = self.rates.groupby(['rate', Y_HAT]).size().unstack(fill_value=0)
        self.ConfusionMatrix.insert(0, 1, 0)
        self.Accuracy = sum([self.ConfusionMatrix.loc[i, i] for i in range(1, 11)]) / len(self.rates)
        self.Precision = [self.ConfusionMatrix.loc[i, i] / sum(self.ConfusionMatrix.loc[i]) for i in range(1, 11)]
        self.Recall = [self.ConfusionMatrix.loc[i, i] / sum(self.ConfusionMatrix.loc[i]) for i in range(1, 11)]
        self.F1Score = [2 * self.Precision[i] * self.Recall[i] / (self.Precision[i] + self.Recall[i]) for i in range(0, 10)]
        print(f"ConfusionMatrix: {self.ConfusionMatrix}\nAccuracy: {self.Accuracy},\nPrecision: {self.Precision}, \nRecall: {self.Recall}, \nF1Score: {self.F1Score}")
    
movie_data_loader = MovieDataLoader("../data")
avg_movie_recommender = AvgMovieRecommender(movie_data_loader)

Analyzer(avg_movie_recommender).evaluate()

         user  movie  rate        time  rate_avg_movie  rate_avg_movie_class
129090  41276  10384    10  1549038000            10.0                    10
129091  41277  10384    10  1520530020            10.0                    10
137412  48788  10799    10  1557332640            10.0                    10
140453  51771  10974    10  1351621980            10.0                    10
MAE:	1.3034256627520515, 
MSE:	4.089427083616391, 
RMSE:	2.0222331921952996 
MAPE:	0.4275584271753014
ConfusionMatrix: rate_avg_movie_class  1   2   3   4   5    6     7     8      9    10
rate                                                                 
1                      0   1   6  23  85   93   395   625   3967    4
2                      0   2   1  10  29   29    72   221    407    0
3                      0   0   1   5  30   38   121   239    379    0
4                      0   0   0   5  17   39   108   352    562    0
5                      0   0   1   6  25   66   199   571   1172    0
6     

  self.F1Score = [2 * self.Precision[i] * self.Recall[i] / (self.Precision[i] + self.Recall[i]) for i in range(0, 10)]


<__main__.Analyzer at 0x7e4fc5618770>

# Ramdom 

MAE:	4.112209508919054, 
MSE:	24.60117262454694, 
RMSE:	4.959956917609965 
MAPE:	0.6008466163098031

Accuracy: 0.07113211569895529
Precision: [np.float64(0.050778995960761686), np.float64(0.09727626459143969), np.float64(0.13161131611316113), np.float64(0.11080332409972299), np.float64(0.10980392156862745), np.float64(0.1043382756727073), np.float64(0.11429879444529223), np.float64(0.10951884662150425), np.float64(0.10889403432307274), np.float64(0.05539608732033344)]
Recall: [np.float64(0.050778995960761686), np.float64(0.09727626459143969), np.float64(0.13161131611316113), np.float64(0.11080332409972299), np.float64(0.10980392156862745), np.float64(0.1043382756727073), np.float64(0.11429879444529223), np.float64(0.10951884662150425), np.float64(0.10889403432307274), np.float64(0.05539608732033344)]
F1Score: [np.float64(0.050778995960761686), np.float64(0.09727626459143969), np.float64(0.13161131611316113), np.float64(0.11080332409972299), np.float64(0.10980392156862745), np.float64(0.1043382756727073), np.float64(0.11429879444529223), np.float64(0.10951884662150425), np.float64(0.10889403432307274), np.float64(0.05539608732033344)]