# Evaluation

In [None]:
import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor
from utils import eda_utils, evaluation_utils
import numpy as np

## AutoML top 5 model Evaluation

- boxplot: dis_bin-wise distribution, error distribution
- output: csv

In [None]:
test_df = pd.read_csv('data/241005_Rockhead_Seoul_test(before_aumentation).csv')

In [None]:
test_df['ground_name'] = test_df['ground_name'].astype('category')
test_df['rock_type'] = test_df['rock_type'].astype('category')

test = TabularDataset(test_df.drop(['depth_start', 'dis_bin'], axis=1))
y_test = test_df['depth_start'].values.tolist()

In [None]:
predictor = TabularPredictor.load('path_to_predictor')
ld_board = predictor.leaderboard(test, silent=True)

In [None]:
evaluation_df = pd.Dataframe(columns=['model', 'mae', 'mse', 'rmse', 'r2', 'adjusted_r2'])
error_df = pd.Dataframe(columns=['bin', 'model', 'y_true', 'y_pred', 'error'])

In [None]:
for i in range(5):
        
    for bin in test_df['dis_bin'].unique():

        bin_df = test_df[test_df['dis_bin'] == bin]
                                
        test_bin = TabularDataset(bin_df.drop(['depth_start', 'dis_bin'], axis=1))
        y_test_bin = bin_df['depth_start'].values.tolist()

        pred_y = predictor.predict(test_bin, ld_board.model[i])

        error = {'y_true':y_test_bin, 'y_pred':pred_y, 'error':abs(y_test_bin-pred_y)}
        error_df.append(error, ignore_index=True)
        error_df[error_df['bin']==None] = bin
        error_df[error_df['model']==None] = ld_board.model[i]
    
    mae, mse, rmse, r2, adjusted_r2 = evaluation_utils.evaluation(error_df['y_true'].array(), error_df['y_pred'].array())
    result={'model':ld_board.model[i], 'mae':mae, 'mse':mse, 'rmse':rmse, 'r2':r2, 'adjusted_r2':adjusted_r2}
    evaluation_df.append(result, ignore_index=True)

In [None]:
evaluation_df.to_csv('data/241025_AutoML_top5_Evaluation(before_aumentation, parameter tuning).csv', index=False)

In [None]:
# model-wise error distribution
eda_utils.box_plot(error_df, 'error', 'model', 'Error Distribution')

In [None]:
# one model's dis_bin-wise error distribution
for i in range(5):
    eda_utils.box_plot(error_df[error_df['model']==ld_board.model[i]], 'error', 'bin', 'Error Distribution')