# OOP Implementation for Classification and Regression Metrics Evaluation

In [689]:
# import library from scikit-learn for crosscheck in the final step

from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [690]:
# super / parent class for both classification and regression metrics evaluation 

class BaseEvaluation():
    def __init__(self):        
        self._actual_values = []
        self._predicted_values = []

    # get_values to get the current actual and predicted values for metrics evaluation
    def get_values(self):
        print('Actual Values:', self._actual_values)
        print('Predicted Values:', self._predicted_values)
        
    # set_values to add actual and predicted values for metrics evaluation
    def set_values(self, actual_values, predicted_values): 
        if not isinstance(actual_values, list) or not isinstance(predicted_values, list):
            raise ValueError("Please input data with the type of list.")

        if len(actual_values) != len(predicted_values):
            raise ValueError('Dataset for actual and predicted values are not in the same length')
        
        self._actual_values.extend(actual_values)
        self._predicted_values.extend(predicted_values)

    # _check_values to check whether any data already inputted for metrics evaluation
    def _check_values(self):
        if not self._actual_values or not self._predicted_values:
            raise ValueError("No data for metrics evaluation. Please add some values by using the set_values function.")
        
    # reset to restart the process of metrics evaluation
    def reset(self):
        self._actual_values = []
        self._predicted_values = []

In [691]:
# sub / child class for classification metrics evaluation

class ClassificationEvaluation(BaseEvaluation):
    def accuracy(self):
        self._check_values()
        
        # true prediction to calculate the total of true positive and true negative)
        true_prediction = [1 for y_true, y_pred in zip(self._actual_values, self._predicted_values) if y_true == y_pred]
        total_dataset = len(self._actual_values)

        return sum(true_prediction)/total_dataset
    
    def precision(self):
        self._check_values()

        # true positive to calculate the total of true positive)
        true_positive = [1 for y_true, y_pred in zip(self._actual_values, self._predicted_values) if (y_true + y_pred == 2)]
        # predicted positive to calculate the total of false positive)
        predicted_positive = [1 for y_true, y_pred in zip(self._actual_values, self._predicted_values) if ((y_true == 0) and (y_pred == 1))]

        return sum(true_positive)/(sum(true_positive) + sum(predicted_positive))

    def recall(self):
        self._check_values()
       
        true_positive = [1 for y_true, y_pred in zip(self._actual_values, self._predicted_values) if (y_true + y_pred == 2)]
        # predicted negative to calculate the total of false negative)
        predicted_negative = [1 for y_true, y_pred in zip(self._actual_values, self._predicted_values) if ((y_true == 1) and (y_pred == 0))]

        return sum(true_positive)/(sum(true_positive) + sum(predicted_negative))
    
    def f1(self):
        self._check_values()

        precision = self.precision()
        recall = self.recall()

        return (2*precision*recall/(precision+recall))
    
    # evaluate function to evaluate all evaluation metrics
    def evaluate(self):
        return {
            "Accuracy": self.accuracy(),
            "Precision": self.precision(),
            "Recall": self.recall(),
            "F1 Score": self.f1()
            }


In [692]:
# data dummy for classification metrics evaluation

classification_actual_label = [1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1]
classification_predicted_labels = [1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1]

In [693]:
# create object from class ClassificationEvaluation()

classification_evaluation = ClassificationEvaluation()

In [694]:
# data input for metrics evaluation

classification_evaluation.set_values(classification_actual_label, classification_predicted_labels)

In [695]:
# classification metrics evaluation

classification_evaluation.evaluate()

{'Accuracy': 0.8, 'Precision': 0.8125, 'Recall': 0.8125, 'F1 Score': 0.8125}

In [696]:
# cross check with scikit-learn methods for metrics evaluation

accuracy_scikit = accuracy_score(classification_actual_label, classification_predicted_labels)
precision_scikit = precision_score(classification_actual_label, classification_predicted_labels)
recall_scikit = recall_score(classification_actual_label, classification_predicted_labels)
f1_scikit = f1_score(classification_actual_label, classification_predicted_labels)

print(f'Accuracy: {accuracy_scikit}, Precision: {precision_scikit}, Recall: {recall_scikit}, F1 Score: {f1_scikit}')

Accuracy: 0.8, Precision: 0.8125, Recall: 0.8125, F1 Score: 0.8125


Summary: Evaluation metrics for classification from ClassificationEvaluation() class and scikit-learn method show the same values. 

In [697]:
# sub / child class for regression metrics evaluation


class RegressionEvaluator(BaseEvaluation):

    def mean_absolute_error(self):
        self._check_values()

        # calculate absolute errors
        errors = [abs(y_true - y_pred) for y_true, y_pred in zip(self._actual_values, self._predicted_values)]
        return sum(errors) / len(errors)
    
    def mean_squared_error(self):
        self._check_values()

        # calculate squared errors
        errors = [(y_true - y_pred)**2 for y_true, y_pred in zip(self._actual_values, self._predicted_values)]
        return sum(errors) / len(errors)
    
    def r2_score(self):
        self._check_values()

        # calculate explained variation and total variation
        explained_variation = [(y_true - y_pred)**2 for y_true, y_pred in zip(self._actual_values, self._predicted_values)]
        average_y_true = sum(self._actual_values)/len(self._actual_values)
        total_variation = [(y_true - average_y_true)**2 for y_true in self._actual_values]
        return 1 - sum(explained_variation) / sum(total_variation)

    # evaluate function to evaluate all evaluation metrics
    def evaluate(self):
        return {
            'MAE': self.mean_absolute_error(),
            'MSE': self.mean_squared_error(),
            'R2': self.r2_score(),
        }



In [698]:
# data dummy for regression metrics evaluation


regression_actual_values = [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155]
regression_predicted_values = [12, 18, 22, 28, 29, 38, 42, 48, 51, 58, 62, 68, 73, 78, 82, 88, 92, 96, 102, 110, 112, 118, 122, 128, 133, 138, 142, 148, 152, 158]

In [699]:
# create object from class RegressionEvaluation()

regression_evaluation = RegressionEvaluator()


In [700]:
# data input for metrics evaluation

regression_evaluation.set_values(regression_actual_values, regression_predicted_values)

In [701]:
# regression metrics evaluation

regression_evaluation.evaluate()

{'MAE': 2.5, 'MSE': 6.9, 'R2': 0.9963159065628476}

In [702]:
# cross check with scikit-learn methods for metrics evaluation

mae_scikit = mean_absolute_error(regression_actual_values, regression_predicted_values)
mse_scikit = mean_squared_error(regression_actual_values, regression_predicted_values)
r2_scikit = r2_score(regression_actual_values, regression_predicted_values)

print('MAE: {}, MSE: {}, R2: {}'.format(mae_scikit, mse_scikit, r2_scikit))


MAE: 2.5, MSE: 6.9, R2: 0.9963159065628476


Summary: Evaluation metrics for regression from RegressionEvaluation() class and scikit-learn method show the same values. 

## Additional (Checking other functions)

In [703]:
# check current values inside regression_evaluation

regression_evaluation.get_values()

Actual Values: [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155]
Predicted Values: [12, 18, 22, 28, 29, 38, 42, 48, 51, 58, 62, 68, 73, 78, 82, 88, 92, 96, 102, 110, 112, 118, 122, 128, 133, 138, 142, 148, 152, 158]


In [704]:
# add more values to regression_evaluation

regression_evaluation.set_values([103], [107])

In [705]:
# check current values inside regression_evaluation after data addition

regression_evaluation.get_values()

Actual Values: [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 103]
Predicted Values: [12, 18, 22, 28, 29, 38, 42, 48, 51, 58, 62, 68, 73, 78, 82, 88, 92, 96, 102, 110, 112, 118, 122, 128, 133, 138, 142, 148, 152, 158, 107]


In [706]:
# evaluate using additional data

regression_evaluation.evaluate()

{'MAE': 2.5483870967741935, 'MSE': 7.193548387096774, 'R2': 0.996059666442471}

In [707]:
# cross check with scikit learn method

mae_scikit = mean_absolute_error(regression_evaluation._actual_values, regression_evaluation._predicted_values)
mse_scikit = mean_squared_error(regression_evaluation._actual_values, regression_evaluation._predicted_values)
r2_scikit = r2_score(regression_evaluation._actual_values, regression_evaluation._predicted_values)

print(f'MAE: {mae_scikit}, MSE: {mse_scikit}, R2: {r2_scikit}')

MAE: 2.5483870967741935, MSE: 7.193548387096774, R2: 0.996059666442471


In [708]:
# check reset function

regression_evaluation.reset()
regression_evaluation.get_values()

Actual Values: []
Predicted Values: []
