In [19]:
import os
import sys

sys.path.append('..')

import pandas as pd

from custom_metrics import *

In [18]:
# Utility function to apply a metric by fold and then get avg/standard dev over folds
def get_metric_CV(raw_preds, metric, submodel = None, **kwargs):
    if submodel == 'nonzero':
        raw_preds = raw_preds[raw_preds['actual'] != 0].copy(deep = True) # tossing zero entries
    raw_preds = raw_preds.groupby('fold')

    if submodel is None:
        metric_by_group = raw_preds.apply(lambda x: metric(x['actual'], x['predicted'], **kwargs), include_groups = False)
    elif submodel == 'zero':
        metric_by_group = raw_preds.apply(lambda x: metric(x['actual_zero'], x['predicted_zero'], **kwargs), include_groups = False)
    elif submodel == 'nonzero':
        metric_by_group = raw_preds.apply(lambda x: metric(x['actual'], x['predicted_nonzero'], **kwargs), include_groups = False)
    
    metric_mean = metric_by_group.mean()
    metric_std = metric_by_group.std()

    return metric_mean, metric_std

In [46]:
# Reading raw predictions from the three models
base_path = '/Users/emiliolr/Desktop/life-hunting/final_results'
model_names = ['pymer_hurdle_wo_rfx_tune-thresh', 'rf_hurdle_2.0mins_rebalance-classes', 
               'xgboost_hurdle_2.0mins_rebalance-classes']
cv_type = 'spatial'

if cv_type == 'random':
    fp_template = 'raw_predictions/%s_mammals_recreated_5-fold_random-blocking.csv'
elif cv_type == 'group':
    fp_template = 'raw_predictions/%s_mammals_recreated_5-fold_group-blocking_species.csv'
elif cv_type == 'spatial':
    fp_template = 'raw_predictions/%s_mammals_recreated_5-fold_spatial-blocking_5-degree.csv'

df_pymer = pd.read_csv(os.path.join(base_path, fp_template % model_names[0]))
df_rf = pd.read_csv(os.path.join(base_path, fp_template % model_names[1]))
df_xgboost = pd.read_csv(os.path.join(base_path, fp_template % model_names[2]))

In [47]:
# Assembling a dataframe w/all predictions for simulating ensembling
df_ensemble = df_pymer[['fold', 'actual']].copy(deep = True)

df_ensemble['linear_hurdle_pred'] = df_pymer['predicted']
df_ensemble['rf_hurdle_pred'] = df_rf['predicted']
df_ensemble['xgboost_hurdle_pred'] = df_xgboost['predicted']

df_ensemble['predicted'] = (0.33 * df_ensemble['linear_hurdle_pred']) + (0.33 * df_ensemble['rf_hurdle_pred']) + (0.33 * df_ensemble['xgboost_hurdle_pred'])
df_ensemble

Unnamed: 0,fold,actual,linear_hurdle_pred,rf_hurdle_pred,xgboost_hurdle_pred,predicted
0,4,0.377193,0.701325,0.786044,0.755944,0.740293
1,4,0.865690,-0.000000,0.829614,0.755944,0.523234
2,4,0.833333,0.701325,0.786044,0.755944,0.740293
3,4,0.900862,-0.000000,0.829614,0.755944,0.523234
4,4,0.956140,0.701325,0.786044,0.755944,0.740293
...,...,...,...,...,...,...
3276,0,1.000000,0.692275,0.911110,0.792354,0.790594
3277,0,1.000000,0.560590,-0.000000,-0.000000,0.184995
3278,0,1.000000,0.616085,-0.000000,-0.000000,0.203308
3279,0,1.000000,0.611999,-0.000000,-0.000000,0.201960


In [48]:
# MedAE in the 0-1 range
upper_bound = 1

medAE_mean, medAE_std = get_metric_CV(df_ensemble, median_absolute_error_range, **{'upper_bound' : upper_bound, 'return_pct_kept' : False})
print('MedAE mean (0-1):', medAE_mean)
print('MedAE std dev (0-1):', medAE_std)
print()

# MedAE overall
medAE_mean, medAE_std = get_metric_CV(df_ensemble, median_absolute_error, **{})
print('MedAE mean (overall):', medAE_mean)
print('MedAE std dev (overall):', medAE_std)

MedAE mean (0-1): 0.2977859104711136
MedAE std dev (0-1): 0.019916849603291688

MedAE mean (overall): 0.3794916042827926
MedAE std dev (overall): 0.039923254196097394


In [49]:
# Comparing w/the cross-val results for individual models
cv_all_metrics = pd.read_csv(os.path.join(base_path, 'cross_val_results_all_metrics.csv'))
cv_all_metrics[(cv_all_metrics['block_type'] == cv_type) & ((cv_all_metrics['metric'] == 'median_absolute_error-1') | (cv_all_metrics['metric'] == 'median_absolute_error-inf'))]

Unnamed: 0,metric,mean,standard_deviation,model_name,dataset,date,num_folds,block_type,spatial_spacing,group_col
19,median_absolute_error-inf,0.400595,0.127273,dummy_regressor_median,mammals_recreated,2025-04-09 17:45:21,5,spatial,5.0,
20,median_absolute_error-1,0.271204,0.142025,dummy_regressor_median,mammals_recreated,2025-04-09 17:45:21,5,spatial,5.0,
47,median_absolute_error-inf,0.37034,0.077454,pymer_hurdle_wo_rfx_tune-thresh,mammals_recreated,2025-04-09 17:52:40,5,spatial,5.0,
48,median_absolute_error-1,0.297292,0.060994,pymer_hurdle_wo_rfx_tune-thresh,mammals_recreated,2025-04-09 17:52:40,5,spatial,5.0,
77,median_absolute_error-inf,0.39781,0.038317,rf_hurdle_2.0mins_rebalance-classes,mammals_recreated,2025-04-09 17:31:51,5,spatial,5.0,
78,median_absolute_error-1,0.32084,0.026139,rf_hurdle_2.0mins_rebalance-classes,mammals_recreated,2025-04-09 17:31:51,5,spatial,5.0,
107,median_absolute_error-inf,0.363033,0.094408,xgboost_hurdle_2.0mins_rebalance-classes,mammals_recreated,2025-04-09 18:56:38,5,spatial,5.0,
108,median_absolute_error-1,0.278975,0.034485,xgboost_hurdle_2.0mins_rebalance-classes,mammals_recreated,2025-04-09 18:56:38,5,spatial,5.0,
