In [50]:
import pandas as pd
import math
import os
import random

In [53]:
class MovieDataLoader:
    def __init__(self, file_path = '../data'):
        self.file_path = file_path
        self.__load_data()

    def __load_data(self):
        for file_name in os.listdir(self.file_path):
            self.file_name = []
            if file_name.endswith('.csv'):
                setattr(self, os.path.splitext(file_name)[0], pd.read_csv(os.path.join(self.file_path, file_name)))
            if file_name.endswith('.txt'):
                setattr(self, os.path.splitext(file_name)[0], pd.read_csv(os.path.join(self.file_path, file_name), sep='\t', engine='python'))


class RandomRecommender:
    def __init__(self, movie_data_loader):
        self.loader = movie_data_loader
        self.movies = self.loader.movies
        self.rates = self.loader.rates

    def run(self):
        self.rates['rate_random'] = [random.uniform(1, 10) for _ in range(len(self.rates))]
        self.rates = self.rates.sort_values(by='rate_random', ascending=False)
        # 사사오입 반올림
        self.rates['rate_random_class'] = self.rates['rate_random'].apply(lambda x: math.floor(x + 0.5))
        return self
    
class Analyzer:
    def __init__(self, recommender):
        self.recommender = recommender
        self.rates = self.recommender.rates

    def evaluate(self):
        self.MAE = sum(abs(self.rates['rate'] - self.rates['rate_random'])) / len(self.rates)
        self.MSE = sum((self.rates['rate'] - self.rates['rate_random']) ** 2) / len(self.rates)
        self.RMSE = self.MSE ** 0.5
        self.MAPE = sum(abs(self.rates['rate'] - self.rates['rate_random']) / self.rates['rate']) / len(self.rates)
        self.ConfusionMatrix = self.rates.groupby(['rate', 'rate_random_class']).size().unstack(fill_value=0)
        self.Accuracy = sum([self.ConfusionMatrix.loc[i, i] for i in range(1, 11)]) / len(self.rates)
        self.Precision = [self.ConfusionMatrix.loc[i, i] / sum(self.ConfusionMatrix.loc[i]) for i in range(1, 11)]
        self.Recall = [self.ConfusionMatrix.loc[i, i] / sum(self.ConfusionMatrix.loc[i]) for i in range(1, 11)]
        self.F1Score = [2 * self.Precision[i] * self.Recall[i] / (self.Precision[i] + self.Recall[i]) for i in range(0, 10)]
        print(f"ConfusionMatrix: {self.ConfusionMatrix}")
        print(f"Accuracy: {self.Accuracy}")
        print(f"Precision: {self.Precision}")
        print(f"Recall: {self.Recall}")
        print(f"F1Score: {self.F1Score}")
        return self


movie_data_loader = MovieDataLoader("../data")
random_recommender = RandomRecommender(movie_data_loader)
random_recommender.run()

analyzer = Analyzer(random_recommender)
analyzer.evaluate()


ConfusionMatrix: rate_random_class    1      2      3      4      5      6      7      8   \
rate                                                                       
1                   285    604    559    601    584    563    567    578   
2                    54     93     80     97     64     83     87     78   
3                    57    100     90     95    100     78     86     74   
4                    58    137    114    114    130    124    127    115   
5                   104    225    229    222    210    209    229    247   
6                   214    426    378    394    380    410    457    353   
7                   358    763    697    673    734    780    784    691   
8                   627   1300   1259   1295   1253   1247   1301   1341   
9                   773   1617   1585   1627   1641   1668   1646   1704   
10                 5325  10391  10570  10571  10463  10486  10457  10495   

rate_random_class     9     10  
rate                            
1   

<__main__.Analyzer at 0x245a935d3a0>

# 혼동 행렬 (Confusion Matrix)

# 정확도 (Accuracy)

# 정밀도 (Precision)

# 재현율 (Recall)

# F1 점수 (F1 Score)

# + ROC 곡선 (ROC Curve)

# REF
[Simplifying The Visualization of Confusion Matix](https://ir.cwi.nl/pub/22777/22777B.pdf)

[조화평균 이해](https://www.youtube.com/watch?v=llK4YBiv4uc)

# INFO
python 반올림 방법
Bankers' Rounding 형식, 우리는 사사오입방식으로 해야. class의 분산의 영향력을 최소화 할 수 있다.