# Evaluation

In [1]:
import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor
from utils import eda_utils, evaluation_utils
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
ml_df = pd.DataFrame(columns=['method', 'mae', 'mse', 'rmse', 'r2', 'adjusted_r2'])
si_df = pd.DataFrame(columns=['method', 'mae', 'mse', 'rmse', 'r2', 'adjusted_r2'])

ml_eval_df = pd.DataFrame(columns=['bin', 'method', 'y_true', 'y_pred', 'error'])
si_eval_df = pd.DataFrame(columns=['bin', 'method', 'y_true', 'y_pred', 'error'])

## Autogluon top 5 model Evaluation

- boxplot: dis_bin-wise distribution, error distribution
- barplot: dis_bin-wise rmse, adjusted_r2
- output: csv

In [2]:
test_df = pd.read_csv('data/241026_Rockhead_Seoul_test(before_aumentation).csv')

In [3]:
test = TabularDataset(test_df.drop(['dis_bin'], axis=1))

In [4]:
predictor = TabularPredictor.load('AutogluonModels\\ag-20241026_094403')
ld_board = predictor.leaderboard(test, silent=True)

In [13]:
for i in range(5):
        
    for bin in test_df['dis_bin'].unique():

        error_df = pd.DataFrmae(columns=['bin', 'method', 'y_true', 'y_pred', 'error'])

        bin_df = test_df[test_df['dis_bin'] == bin]
                                
        X_test_bin = TabularDataset(bin_df.drop(['depth_start', 'dis_bin'], axis=1))
        y_test_bin = bin_df['depth_start'].array

        pred_y = predictor.predict(X_test_bin, ld_board.model[i])

        error = {'bin':np.full(y_test_bin.shape, bin), 'method':np.full(y_test_bin.shape, ld_board.model[i]),'y_true':y_test_bin, 'y_pred':pred_y, 'error':abs(y_test_bin-pred_y)}
        error_df = pd.concat([error_df, pd.DataFrame(error)], ignore_index=True)

        ml_error_df = pd.concat([ml_error_df, error_df], ignore_index=True)
    
    mae, mse, rmse, r2, adjusted_r2 = evaluation_utils.evaluation(error_df['y_true'], error_df['y_pred'], len(error_df), 7)
    result={'method':ld_board.model[i], 'mae':mae, 'mse':mse, 'rmse':rmse, 'r2':r2, 'adjusted_r2':adjusted_r2}
    ml_eval_df = pd.concat([ml_eval_df, pd.DataFrame(result)], ignore_index=True)

TypeError: 'numpy.float64' object is not callable

In [8]:
ml_eval_df.to_csv('data/241026_AutoML_top5_Evaluation(before_aumentation, parameter tuning).csv', index=False)

In [None]:
# method-wise error distribution
eda_utils.box_plot(ml_error_df, 'error', 'method', 'Error Distribution')

In [None]:
# dis_bin-wise error distribution
evaluation_utils.barplot(ml_eval_df, 'bin', 'RMSE', 'distribution', hue='method')
evaluation_utils.barplot(ml_eval_df, 'bin', 'adjusted_r2', 'distribution', hue='method')

## Spatial Interpolation Method Evaluation

- boxplot: dis_bin-wise distribution, error distribution
- output: csv

GNNWR [Geographically neural network weighted regression for the accurate estimation of spatial non-stationarity](https://doi.org/10.1080/13658816.2019.1707834)

KCN [Kriging Convolutional Networks](https://doi.org/10.48550/arXiv.2306.09463)

GSARNN [A generalized spatial autoregressive neural network method for three-dimensional spatial interpolation](https://doi.org/10.5194/gmd-16-2777-2023)

DKNN [DKNN: deep kriging neural network for interpretable geospatial interpolation](https://doi.org/10.1080/13658816.2024.2347316)

Kriging [PyKrige](https://geostat-framework.readthedocs.io/projects/pykrige/en/stable/)



KNN [scikit-learn-Nearest Neighbors regression](https://scikit-learn.org/stable/auto_examples/neighbors/plot_regression.html)

IDW []

RBF [Scipy-RBf](https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.Rbf.html#scipy.interpolate.Rbf)

In [None]:
test_df = pd.read_csv('data/241026_Rockhead_Seoul_test(before_aumentation).csv')

In [None]:
for i in range(8):
        
    for bin in test_df['dis_bin'].unique():

        error_df = pd.DataFrmae(columns=['bin', 'method', 'y_true', 'y_pred', 'error'])

        bin_df = test_df[test_df['dis_bin'] == bin]
                                
        # X_test_bin = TabularDataset(bin_df.drop(['depth_start', 'dis_bin'], axis=1))
        # y_test_bin = bin_df['depth_start'].array

        # pred_y = predictor.predict(X_test_bin, ld_board.model[i])

        error = {'bin':np.full(y_test_bin.shape, bin), 'method':np.full(y_test_bin.shape, ld_board.model[i]),'y_true':y_test_bin, 'y_pred':pred_y, 'error':abs(y_test_bin-pred_y)}
        error_df = pd.concat([error_df, pd.DataFrame(error)], ignore_index=True)

        si_error_df = pd.concat([si_error_df, error_df], ignore_index=True)
    
    mae, mse, rmse, r2, adjusted_r2 = evaluation_utils.evaluation(error_df['y_true'], error_df['y_pred'], len(error_df), 7)
    result={'method':, 'mae':mae, 'mse':mse, 'rmse':rmse, 'r2':r2, 'adjusted_r2':adjusted_r2}
    si_eval_df = pd.concat([si_eval_df, pd.DataFrame(result)], ignore_index=True)

In [None]:
si_eval_df.to_csv('data/241026_Spatial_Interpolation_Evaluation(before_aumentation, parameter tuning).csv', index=False)

In [None]:
# method-wise error distribution
eda_utils.box_plot(si_error_df, 'error', 'method', 'Error Distribution')

In [None]:
# dis_bin-wise error distribution
evaluation_utils.barplot(si_eval_df, 'bin', 'RMSE', 'distribution', hue='method')
evaluation_utils.barplot(si_eval_df, 'bin', 'adjusted_r2', 'distribution', hue='method')

## Comparison Autogloun Model, Spatial Interpolation Method

- multi-barplot: dis_bin

In [None]:
error_df = pd.concat([ml_error_df, si_error_df], ignore_index=True)
evaluation_df = pd.concat([ml_eval_df, si_eval_df], ignore_index=True)

In [None]:
# method-wise error distribution
eda_utils.box_plot(si_error_df, 'error', 'method', 'Error Distribution')

In [None]:
# dis_bin-wise error distribution
evaluation_utils.barplot(si_eval_df, 'bin', 'RMSE', 'distribution', hue='method')
evaluation_utils.barplot(si_eval_df, 'bin', 'adjusted_r2', 'distribution', hue='method')

## Surface Visualization