In [13]:
import pandas as pd
import math
import os

In [15]:
class MovieDataLoader:
    def __init__(self, file_path = '../data'):
        self.file_path = file_path
        self.__load_data()
        self.__clean_data()

    def __load_data(self):
        for file_name in os.listdir(self.file_path):
            self.file_name = []
            if file_name.endswith('.csv'):
                setattr(self, os.path.splitext(file_name)[0], pd.read_csv(os.path.join(self.file_path, file_name)))
            if file_name.endswith('.txt'):
                setattr(self, os.path.splitext(file_name)[0], pd.read_csv(os.path.join(self.file_path, file_name), sep='\t', engine='python'))

    def __clean_data(self):
        pass


class AvgUserRecommender:
    def __init__(self, movie_data_loader):
        self.loader = movie_data_loader
        self.movies = self.loader.movies
        self.rates = self.loader.rates
        self.run()

    def run(self):
        self.rates['rate_avg_user'] = self.rates.groupby('user')['rate'].transform('mean')
        self.rates['rate_avg_user_class'] = self.rates['rate_avg_user'].apply(lambda x: math.floor(x + 0.5))
        self.rates = self.rates.sort_values(by='rate_avg_user', ascending=False)
        print(self.rates.head(4))
        print("=" * 42)
        return self
    
class Analyzer:
    def __init__(self, recommender):
        self.recommender = recommender
        self.rates = self.recommender.rates

    def evaluate(self):
        Y_HAT_REG = 'rate_avg_user'
        Y_HAT_CLA = 'rate_avg_user_class'
        self.__eval_regression(Y_HAT_REG)
        print("=" * 42)
        self.__eval_classification(Y_HAT_CLA)
        return self
    
    def __eval_regression(self, Y_HAT):
        self.MAE = sum(abs(self.rates['rate'] - self.rates[str(Y_HAT)])) / len(self.rates)
        self.MSE = sum((self.rates['rate'] - self.rates[str(Y_HAT)]) ** 2) / len(self.rates)
        self.RMSE = self.MSE ** 0.5
        self.MAPE = sum(abs(self.rates['rate'] - self.rates[str(Y_HAT)]) / self.rates['rate']) / len(self.rates)
        print(f"MAE:\t{self.MAE}, \nMSE:\t{self.MSE}, \nRMSE:\t{self.RMSE} \nMAPE:\t{self.MAPE}")

    def __eval_classification(self, Y_HAT):
        self.ConfusionMatrix = self.rates.groupby(['rate', Y_HAT]).size().unstack(fill_value=0)
        self.Accuracy = sum([self.ConfusionMatrix.loc[i, i] for i in range(1, 11)]) / len(self.rates)
        self.Precision = [self.ConfusionMatrix.loc[i, i] / sum(self.ConfusionMatrix.loc[i]) for i in range(1, 11)]
        self.Recall = [self.ConfusionMatrix.loc[i, i] / sum(self.ConfusionMatrix.loc[i]) for i in range(1, 11)]
        self.F1Score = [2 * self.Precision[i] * self.Recall[i] / (self.Precision[i] + self.Recall[i]) for i in range(0, 10)]
        print(f"ConfusionMatrix: {self.ConfusionMatrix}\nAccuracy: {self.Accuracy},\nPrecision: {self.Precision}, \nRecall: {self.Recall}, \nF1Score: {self.F1Score}")
    
movie_data_loader = MovieDataLoader("../data")
avg_movie_recommender = AvgUserRecommender(movie_data_loader)

Analyzer(avg_movie_recommender).evaluate()

         user  movie  rate        time  rate_avg_user  rate_avg_user_class
140690  52008  10998    10  1200927300           10.0                   10
140709  52027  10998    10  1227036960           10.0                   10
140706  52024  10998    10  1204622460           10.0                   10
140689  52007  10998    10  1280762520           10.0                   10
MAE:	0.7272423581189977, 
MSE:	2.032318646092766, 
RMSE:	1.4255941379273296 
MAPE:	0.21466823025704765
ConfusionMatrix: rate_avg_user_class    1    2    3    4    5    6     7     8      9      10
rate                                                                        
1                    1911   98  271  319  209  547   517   567    732     28
2                       2  163   31   21   78   97   136   121    119      3
3                       0    5  169    5   81   82   186   143    135      7
4                       0    3   29  201   82  121   254   212    172      9
5                       0    8   33   10  4

<__main__.Analyzer at 0x7bc06b82dee0>

# Ramdom 

MAE:	4.112209508919054, 
MSE:	24.60117262454694, 
RMSE:	4.959956917609965 
MAPE:	0.6008466163098031

Accuracy: 0.07113211569895529
Precision: [np.float64(0.050778995960761686), np.float64(0.09727626459143969), np.float64(0.13161131611316113), np.float64(0.11080332409972299), np.float64(0.10980392156862745), np.float64(0.1043382756727073), np.float64(0.11429879444529223), np.float64(0.10951884662150425), np.float64(0.10889403432307274), np.float64(0.05539608732033344)]
Recall: [np.float64(0.050778995960761686), np.float64(0.09727626459143969), np.float64(0.13161131611316113), np.float64(0.11080332409972299), np.float64(0.10980392156862745), np.float64(0.1043382756727073), np.float64(0.11429879444529223), np.float64(0.10951884662150425), np.float64(0.10889403432307274), np.float64(0.05539608732033344)]
F1Score: [np.float64(0.050778995960761686), np.float64(0.09727626459143969), np.float64(0.13161131611316113), np.float64(0.11080332409972299), np.float64(0.10980392156862745), np.float64(0.1043382756727073), np.float64(0.11429879444529223), np.float64(0.10951884662150425), np.float64(0.10889403432307274), np.float64(0.05539608732033344)]


# avg movie

MAE:	1.3034256627520515, 
MSE:	4.089427083616391, 
RMSE:	2.0222331921952996 
MAPE:	0.4275584271753014
==========================================
ConfusionMatrix: rate_avg_movie_class  1   2   3   4   5    6     7     8      9    10
rate                                                                 
1                      0   1   6  23  85   93   395   625   3967    4
2                      0   2   1  10  29   29    72   221    407    0
3                      0   0   1   5  30   38   121   239    379    0
4                      0   0   0   5  17   39   108   352    562    0
5                      0   0   1   6  25   66   199   571   1172    0
6                      0   0   0   4  42   84   265  1119   2123    5
7                      0   0   2   4  28   67   411  1831   4203    7
8                      0   0   0   0  32   60   426  2648   8331   17
9                      0   0   0   1  21   36   237  2272  12077   40

Accuracy: 0.11089474806339279,
Precision: [np.float64(0.0), np.float64(0.0025940337224383916), np.float64(0.0012300123001230013), np.float64(0.0046168051708217915), np.float64(0.012254901960784314), np.float64(0.023064250411861616), np.float64(0.06271936517625515), np.float64(0.22998089282612472), np.float64(0.8224598202124762), np.float64(0.0037177871222632956)], 
Recall: [np.float64(0.0), np.float64(0.0025940337224383916), np.float64(0.0012300123001230013), np.float64(0.0046168051708217915), np.float64(0.012254901960784314), np.float64(0.023064250411861616), np.float64(0.06271936517625515), np.float64(0.22998089282612472), np.float64(0.8224598202124762), np.float64(0.0037177871222632956)], 
F1Score: [np.float64(nan), np.float64(0.0025940337224383916), np.float64(0.0012300123001230013), np.float64(0.0046168051708217915), np.float64(0.012254901960784314), np.float64(0.023064250411861616), np.float64(0.06271936517625515), np.float64(0.22998089282612474), np.float64(0.8224598202124762), np.float64(0.0037177871222632956)]
/tmp/ipykernel_783496/4250076068.py:60: RuntimeWarning: invalid value encountered in scalar divide
  self.F1Score = [2 * self.Precision[i] * self.Recall[i] / (self.Precision[i] + self.Recall[i]) for i in range(0, 10)]