# License 
***
Copyright (C) 2021 -- 2022 J. Patrick Hall, jphall@gwu.edu

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

# Model Evaluation Notebook

#### Imports and inits

In [1]:
import os              # for directory and file manipulation
import numpy as np     # for basic array manipulation
import pandas as pd    # for dataframe manipulation
import datetime        # for timestamp

# for model eval
from sklearn.metrics import accuracy_score, f1_score, log_loss, mean_squared_error, roc_auc_score

# global constants 
ROUND = 3              # generally, insane precision is not needed 
SEED = 12345           # seed for better reproducibility

# to upload local files
import io
from google.colab import files 

# set global random seed for better reproducibility
np.random.seed(SEED)

#### Read in score files 

In [2]:
# special google collab command to upload a file from computer
uploaded = files.upload()

Saving example_preds.csv to example_preds.csv


In [3]:
uploaded.keys() # what is stored in that Python object?

dict_keys(['example_preds.csv'])

In [4]:
scores_frame = pd.read_csv(io.StringIO(uploaded['example_preds.csv'].decode('utf-8')))

In [5]:
# sanity check 
scores_frame

Unnamed: 0,high_priced,fold,group3_randomforest,group4_mxgb,group2_mxgb,group5_ebm,group4_ebm_3,group6_mxgb,ph_mxgb,group6_ebm_2,group2_ebm,group7_xgb_2,ph_glm,group6_ebm,group1_mxgb,group3_decisiontree,group3_glm_2,group3_ebm,group8_gbm_2,group1_ebm,group3_elasticnet,group5_ensemble,group7_ebm,group1_glm,group4_glm,group8_ebmoversample,group2_ebm_3,group7_EBM_2,group8_gbmgrid,group6_mxgb_2,group5_mgbm,group3_ebm_3,group8_ebm,group2_glm,group3_monotonicgbm,group7_xgb,group6_glm,group1_rf,group5_ebm_2,group2_ebm_2,group8_monogbm,group8_ebm_3,ph_ebm,group3_ebm_2,group8_ebm_2,group3_mxgb_2,group4_ebm_2
0,0.0,2,0.063028,0.058037,0.065526,0.079366,0.005820,0.062775,0.059522,0.078887,0.065526,0.071805,0.142090,0.079334,0.066214,0.032648,0.158336,0.086361,0.095617,0.082841,0.142090,0.084539,0.083497,0.142090,0.121367,0.452653,0.068860,0.086287,0.109255,0.068056,0.076263,0.084322,0.082841,0.142090,0.086361,0.083497,0.136975,0.100704,0.078818,0.066693,0.169457,0.079292,0.082841,0.171009,0.074440,0.254148,0.063845
1,0.0,1,0.030348,0.032129,0.032689,0.027144,0.254738,0.035047,0.036210,0.027960,0.032689,0.029102,0.081674,0.028650,0.034362,0.032648,0.159289,0.033920,0.017854,0.027079,0.081674,0.029120,0.033635,0.081674,0.093075,0.213467,0.031600,0.029360,0.021129,0.034898,0.036957,0.029677,0.027079,0.081674,0.033920,0.033635,0.084681,0.098442,0.029009,0.032970,0.016476,0.028798,0.027079,0.323614,0.026770,0.249009,0.028332
2,1.0,4,0.174309,0.161683,0.167186,0.182317,0.075005,0.161925,0.180734,0.169545,0.167186,0.163567,0.125823,0.173083,0.171752,0.166571,0.070368,0.183323,0.178216,0.190718,0.125823,0.171555,0.172822,0.125823,0.114441,0.672604,0.178979,0.195971,0.181202,0.161931,0.188531,0.189871,0.190718,0.125823,0.183323,0.172822,0.123435,0.119991,0.191593,0.177787,0.178777,0.178244,0.190718,0.023242,0.190482,0.033878,0.174635
3,0.0,1,0.019114,0.023556,0.033490,0.029269,0.559781,0.026129,0.027677,0.032026,0.033490,0.026050,0.006973,0.025840,0.031721,0.032648,0.124670,0.030934,0.018390,0.031069,0.006973,0.011600,0.017356,0.006973,0.030920,0.203551,0.026199,0.029024,0.015636,0.024443,0.022215,0.016929,0.031069,0.006973,0.030934,0.017356,0.010716,0.065078,0.019538,0.024862,0.009344,0.028750,0.031069,0.015636,0.048819,0.021316,0.022828
4,1.0,2,0.207948,0.180101,0.173838,0.202628,0.066230,0.176872,0.177813,0.193220,0.173838,0.170863,0.130426,0.206450,0.176927,0.166571,0.045170,0.178491,0.207122,0.210361,0.130426,0.205943,0.162035,0.130426,0.119331,0.689614,0.198793,0.194710,0.213761,0.179637,0.177027,0.208004,0.210361,0.130426,0.178491,0.162035,0.128327,0.117926,0.203666,0.198582,0.179083,0.203476,0.210361,0.004191,0.211336,0.008621,0.212485
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19826,0.0,3,0.245902,0.253574,0.266065,0.225395,0.020803,0.250139,0.274767,0.220331,0.266065,0.236728,0.160032,0.225026,0.245008,0.258898,0.122798,0.255826,0.238839,0.231624,0.160032,0.266139,0.267906,0.160032,0.128945,0.735663,0.208617,0.195787,0.243438,0.243937,0.230648,0.241439,0.231624,0.160032,0.255826,0.267906,0.152276,0.124180,0.229297,0.234666,0.247609,0.230922,0.231624,0.035635,0.242678,0.037913,0.222671
19827,0.0,1,0.177097,0.173608,0.184479,0.252359,0.011832,0.181217,0.182039,0.235473,0.184479,0.188487,0.123836,0.242065,0.175831,0.187772,0.069357,0.176984,0.240268,0.254823,0.123836,0.220119,0.198807,0.123836,0.115822,0.741521,0.264889,0.229849,0.249503,0.193111,0.184512,0.244708,0.254823,0.123836,0.176984,0.198807,0.122418,0.105582,0.249218,0.259163,0.234628,0.241858,0.254823,0.016384,0.244461,0.023938,0.229508
19828,1.0,3,0.174827,0.229630,0.217184,0.226476,0.328958,0.221794,0.212740,0.198205,0.217184,0.223914,0.169604,0.218404,0.225311,0.187772,0.166674,0.236894,0.194530,0.220400,0.169604,0.202448,0.207814,0.169604,0.139953,0.723375,0.221318,0.226977,0.182491,0.217587,0.219277,0.212593,0.220400,0.169604,0.236894,0.207814,0.162843,0.127146,0.224792,0.217782,0.192819,0.211413,0.220400,0.043647,0.213149,0.051774,0.232992
19829,0.0,1,0.035276,0.001175,0.001412,0.001222,0.145872,0.001602,0.001323,0.001106,0.001412,0.001483,0.002538,0.002327,0.000893,0.017041,0.052115,0.001113,0.005797,0.000993,0.002538,0.007740,0.001113,0.002538,0.023380,0.005516,0.003004,0.001422,0.002802,0.001303,0.010324,0.003073,0.000993,0.002538,0.001113,0.001113,0.004429,0.056343,0.002591,0.006079,0.003607,0.002935,0.000993,0.027301,0.001347,0.030560,0.008614


#### Set basic metadata

In [6]:
y_name = 'high_priced'

#### Utility function for max. accuracy

In [7]:
def max_acc(y, phat, res=0.01): 

    """ Utility function for finding max. accuracy at some cutoff. 
    
        :param y: Known y values.
        :param phat: Model scores.
        :param res: Resolution over which to search for max. accuracy, default 0.01.
        :return: Max. accuracy for model scores.
    
    """
    
    # init frame to store acc at different cutoffs
    acc_frame = pd.DataFrame(columns=['cut', 'acc'])
    
    # copy known y and score values into a temporary frame
    temp_df = pd.concat([y, phat], axis=1)
    
    # find accuracy at different cutoffs and store in acc_frame
    for cut in np.arange(0, 1 + res, res):
        temp_df['decision'] = np.where(temp_df.iloc[:, 1] > cut, 1, 0)
        acc = accuracy_score(temp_df.iloc[:, 0], temp_df['decision'])
        acc_frame = acc_frame.append({'cut': cut,
                                      'acc': acc},
                                     ignore_index=True)

    # find max accurcay across all cutoffs
    max_acc = acc_frame['acc'].max()
    
    # house keeping
    del acc_frame, temp_df
    
    return max_acc

####  Utility function for max. F1

In [8]:
def max_f1(y, phat, res=0.01): 
    
    """ Utility function for finding max. F1 at some cutoff. 
    
        :param y: Known y values.
        :param phat: Model scores.
        :param res: Resolution over which to search for max. F1, default 0.01.
        :return: Max. F1 for model scores.
    
    """
    
    # init frame to store f1 at different cutoffs
    f1_frame = pd.DataFrame(columns=['cut', 'f1'])
    
    # copy known y and score values into a temporary frame
    temp_df = pd.concat([y, phat], axis=1)
    
    # find f1 at different cutoffs and store in acc_frame
    for cut in np.arange(0, 1 + res, res):
        temp_df['decision'] = np.where(temp_df.iloc[:, 1] > cut, 1, 0)
        f1 = f1_score(temp_df.iloc[:, 0], temp_df['decision'])
        f1_frame = f1_frame.append({'cut': cut,
                                    'f1': f1},
                                    ignore_index=True)
        
    # find max f1 across all cutoffs
    max_f1 = f1_frame['f1'].max()
    
     # house keeping
    del f1_frame, temp_df
    
    return max_f1

#### Rank all submitted scores 

In [9]:
eval_frame = pd.DataFrame() # init frame to hold score ranking
metric_list = ['acc', 'auc', 'f1', 'logloss', 'mse'] # metric to use for evaluation

# create eval frame row-by-row
for fold in sorted(scores_frame['fold'].unique()): # loop through folds 
    for metric_name in metric_list: # loop through metrics
        
        # init row dict to hold each rows values
        row_dict = {'fold': fold,
                    'metric': metric_name}
        
        # cache known y values for fold
        fold_y = scores_frame.loc[scores_frame['fold'] == fold, y_name]
        
        for col_name in scores_frame.columns[2:]:
            
            # cache fold scores
            fold_scores = scores_frame.loc[scores_frame['fold'] == fold, col_name]
            
            # calculate evaluation metric for fold
            # with reasonable precision 
            
            if metric_name == 'acc':
                row_dict[col_name] = np.round(max_acc(fold_y, fold_scores), ROUND)
                
            if metric_name == 'auc':
                row_dict[col_name] = np.round(roc_auc_score(fold_y, fold_scores), ROUND)
                
            if metric_name == 'f1':
                row_dict[col_name] = np.round(max_f1(fold_y, fold_scores), ROUND) 
                
            if metric_name == 'logloss':
                row_dict[col_name] = np.round(log_loss(fold_y, fold_scores), ROUND)
                
            if metric_name == 'mse':
                row_dict[col_name] = np.round(mean_squared_error(fold_y, fold_scores), ROUND)
        
        # append row values to eval_frame
        eval_frame = eval_frame.append(row_dict, ignore_index=True)

# init a temporary frame to hold rank information
rank_names = [name + '_rank' for name in eval_frame.columns if name not in ['fold', 'metric']]
rank_frame = pd.DataFrame(columns=rank_names)        

# set columns to necessary order
eval_frame = eval_frame[['fold', 'metric'] + [name for name in sorted(eval_frame.columns) if name not in ['fold', 'metric']]]

# determine score ranks row-by-row
for i in range(0, eval_frame.shape[0]):
        
        # get ranks for row based on metric
        metric_name = eval_frame.loc[i, 'metric']
        if metric_name in ['logloss', 'mse']:
            ranks = eval_frame.iloc[i, 2:].rank().values
        else:
            ranks = eval_frame.iloc[i, 2:].rank(ascending=False).values
        
        # create single-row frame and append to rank_frame
        row_frame = pd.DataFrame(ranks.reshape(1, ranks.shape[0]), columns=rank_names)
        rank_frame = rank_frame.append(row_frame, ignore_index=True)
        
        # house keeping
        del row_frame

# merge ranks onto eval_frame
eval_frame = pd.concat([eval_frame, rank_frame], axis=1)

# house keeping
del rank_frame
        
eval_frame

Unnamed: 0,fold,metric,group1_ebm,group1_glm,group1_mxgb,group1_rf,group2_ebm,group2_ebm_2,group2_ebm_3,group2_glm,group2_mxgb,group3_decisiontree,group3_ebm,group3_ebm_2,group3_ebm_3,group3_elasticnet,group3_glm_2,group3_monotonicgbm,group3_mxgb_2,group3_randomforest,group4_ebm_2,group4_ebm_3,group4_glm,group4_mxgb,group5_ebm,group5_ebm_2,group5_ensemble,group5_mgbm,group6_ebm,group6_ebm_2,group6_glm,group6_mxgb,group6_mxgb_2,group7_EBM_2,group7_ebm,group7_xgb,group7_xgb_2,group8_ebm,group8_ebm_2,group8_ebm_3,...,group2_ebm_2_rank,group2_ebm_3_rank,group2_glm_rank,group2_mxgb_rank,group3_decisiontree_rank,group3_ebm_rank,group3_ebm_2_rank,group3_ebm_3_rank,group3_elasticnet_rank,group3_glm_2_rank,group3_monotonicgbm_rank,group3_mxgb_2_rank,group3_randomforest_rank,group4_ebm_2_rank,group4_ebm_3_rank,group4_glm_rank,group4_mxgb_rank,group5_ebm_rank,group5_ebm_2_rank,group5_ensemble_rank,group5_mgbm_rank,group6_ebm_rank,group6_ebm_2_rank,group6_glm_rank,group6_mxgb_rank,group6_mxgb_2_rank,group7_EBM_2_rank,group7_ebm_rank,group7_xgb_rank,group7_xgb_2_rank,group8_ebm_rank,group8_ebm_2_rank,group8_ebm_3_rank,group8_ebmoversample_rank,group8_gbm_2_rank,group8_gbmgrid_rank,group8_monogbm_rank,ph_ebm_rank,ph_glm_rank,ph_mxgb_rank
0,0.0,acc,0.901,0.9,0.901,0.9,0.901,0.901,0.9,0.9,0.901,0.9,0.902,0.9,0.901,0.9,0.9,0.902,0.9,0.901,0.901,0.9,0.9,0.901,0.901,0.901,0.901,0.902,0.901,0.901,0.9,0.902,0.901,0.901,0.901,0.901,0.902,0.901,0.901,0.901,...,18.5,38.0,38.0,18.5,38.0,3.5,38.0,18.5,38.0,38.0,3.5,38.0,18.5,18.5,38.0,38.0,18.5,18.5,18.5,18.5,3.5,18.5,18.5,38.0,3.5,18.5,18.5,18.5,18.5,3.5,18.5,18.5,18.5,38.0,18.5,38.0,18.5,18.5,38.0,3.5
1,0.0,auc,0.839,0.775,0.814,0.803,0.815,0.838,0.837,0.775,0.815,0.818,0.813,0.465,0.837,0.775,0.479,0.813,0.472,0.816,0.838,0.51,0.778,0.815,0.837,0.839,0.836,0.834,0.836,0.834,0.776,0.814,0.813,0.839,0.813,0.813,0.814,0.839,0.838,0.838,...,10.0,14.0,39.5,24.0,21.0,32.0,45.0,14.0,39.5,43.0,32.0,44.0,22.0,10.0,42.0,36.0,24.0,14.0,4.5,17.0,19.5,17.0,19.5,37.0,27.5,32.0,4.5,32.0,32.0,27.5,4.5,10.0,10.0,4.5,1.0,10.0,17.0,4.5,39.5,27.5
2,0.0,f1,0.408,0.335,0.377,0.368,0.379,0.403,0.401,0.335,0.379,0.37,0.374,0.182,0.402,0.335,0.186,0.374,0.182,0.37,0.403,0.182,0.327,0.376,0.404,0.401,0.396,0.394,0.4,0.397,0.336,0.377,0.372,0.407,0.375,0.375,0.376,0.408,0.405,0.402,...,7.5,13.5,38.5,22.0,33.5,30.5,44.0,10.0,38.5,42.0,30.5,44.0,33.5,7.5,44.0,41.0,26.5,6.0,13.5,19.0,20.0,16.0,18.0,36.0,24.5,32.0,4.0,28.5,28.5,26.5,2.0,5.0,10.0,13.5,13.5,17.0,10.0,2.0,38.5,22.0
3,0.0,logloss,0.251,0.291,0.263,0.305,0.263,0.252,0.252,0.291,0.263,0.259,0.263,0.453,0.252,0.291,0.354,0.263,0.418,0.264,0.252,0.402,0.302,0.263,0.252,0.251,0.253,0.257,0.253,0.253,0.293,0.263,0.263,0.251,0.263,0.263,0.262,0.251,0.251,0.252,...,12.0,12.0,35.5,27.0,20.0,27.0,44.0,12.0,35.5,41.0,27.0,43.0,33.0,12.0,42.0,39.0,27.0,12.0,4.5,17.0,19.0,17.0,17.0,38.0,27.0,27.0,4.5,27.0,27.0,21.0,4.5,4.5,12.0,45.0,4.5,4.5,12.0,4.5,35.5,27.0
4,0.0,mse,0.077,0.084,0.079,0.086,0.078,0.077,0.077,0.084,0.078,0.079,0.078,0.104,0.077,0.084,0.094,0.078,0.102,0.079,0.077,0.099,0.086,0.079,0.077,0.077,0.077,0.077,0.077,0.077,0.084,0.079,0.079,0.077,0.078,0.078,0.078,0.077,0.077,0.077,...,10.0,10.0,36.0,23.5,30.5,23.5,44.0,10.0,36.0,41.0,23.5,43.0,30.5,10.0,42.0,39.5,30.5,10.0,10.0,10.0,10.0,10.0,10.0,36.0,30.5,30.5,10.0,23.5,23.5,23.5,10.0,10.0,10.0,45.0,10.0,10.0,10.0,10.0,36.0,23.5
5,1.0,acc,0.906,0.906,0.906,0.906,0.906,0.906,0.907,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.906,0.907,0.906,0.906,0.906,0.906,0.906,0.906,0.907,0.906,0.906,0.906,0.906,0.906,0.906,...,25.0,2.5,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,2.5,25.0,25.0,25.0,25.0,25.0,25.0,2.5,25.0,25.0,25.0,25.0,25.0,25.0,25.0,2.5,25.0,25.0,25.0,25.0,25.0
6,1.0,auc,0.828,0.757,0.791,0.792,0.793,0.827,0.829,0.757,0.793,0.798,0.792,0.499,0.828,0.757,0.514,0.792,0.503,0.798,0.826,0.539,0.757,0.791,0.829,0.827,0.821,0.824,0.826,0.826,0.758,0.791,0.79,0.827,0.795,0.795,0.79,0.828,0.827,0.826,...,11.0,2.5,39.0,25.5,21.5,28.5,45.0,6.5,39.0,43.0,28.5,44.0,21.5,15.5,42.0,39.0,32.0,2.5,11.0,20.0,19.0,15.5,15.5,36.0,32.0,34.5,11.0,23.5,23.5,34.5,6.5,11.0,15.5,2.5,18.0,2.5,11.0,6.5,39.0,28.5
7,1.0,f1,0.369,0.302,0.339,0.336,0.339,0.373,0.378,0.302,0.339,0.335,0.342,0.172,0.37,0.302,0.177,0.342,0.172,0.336,0.366,0.181,0.302,0.339,0.369,0.372,0.36,0.365,0.368,0.365,0.304,0.341,0.339,0.369,0.341,0.341,0.339,0.369,0.368,0.369,...,2.0,1.0,39.0,28.5,35.0,21.5,44.5,6.0,39.0,43.0,21.5,44.5,33.5,15.5,42.0,39.0,28.5,9.5,3.0,20.0,18.0,13.5,18.0,36.0,24.0,28.5,9.5,24.0,24.0,28.5,9.5,13.5,9.5,4.5,15.5,4.5,18.0,9.5,39.0,32.0
8,1.0,logloss,0.246,0.281,0.263,0.294,0.263,0.247,0.246,0.281,0.263,0.257,0.264,0.417,0.247,0.281,0.331,0.264,0.388,0.26,0.248,0.373,0.292,0.265,0.246,0.247,0.25,0.252,0.248,0.247,0.283,0.264,0.264,0.247,0.262,0.262,0.263,0.246,0.247,0.248,...,10.0,3.5,35.5,25.5,20.0,30.0,44.0,10.0,35.5,41.0,30.0,43.0,21.0,15.5,42.0,39.0,33.0,3.5,10.0,18.0,19.0,15.5,10.0,38.0,30.0,30.0,10.0,22.5,22.5,25.5,3.5,10.0,15.5,45.0,10.0,3.5,15.5,3.5,35.5,30.0
9,1.0,mse,0.074,0.08,0.078,0.082,0.078,0.074,0.074,0.08,0.078,0.077,0.078,0.098,0.074,0.08,0.088,0.078,0.096,0.077,0.075,0.093,0.082,0.079,0.074,0.075,0.075,0.075,0.075,0.075,0.081,0.078,0.078,0.075,0.078,0.078,0.078,0.074,0.075,0.075,...,4.0,4.0,35.5,27.0,20.5,27.0,44.0,4.0,35.5,41.0,27.0,43.0,20.5,13.5,42.0,39.5,33.0,4.0,13.5,13.5,13.5,13.5,13.5,38.0,27.0,27.0,13.5,27.0,27.0,27.0,4.0,13.5,13.5,45.0,13.5,13.5,13.5,4.0,35.5,27.0


#### Display simple ranked score list 

In [10]:
eval_frame[[name for name in eval_frame.columns if name.endswith('rank')]].mean().sort_values()

group7_EBM_2_rank             8.92
group1_ebm_rank               9.04
ph_ebm_rank                   9.04
group8_ebm_rank               9.04
group2_ebm_3_rank             9.24
group8_gbm_2_rank             9.60
group5_ebm_2_rank            10.94
group8_gbmgrid_rank          10.96
group2_ebm_2_rank            11.88
group5_ebm_rank              11.94
group3_ebm_3_rank            12.16
group4_ebm_2_rank            12.16
group6_ebm_2_rank            12.36
group8_ebm_2_rank            12.58
group8_ebm_3_rank            12.72
group8_monogbm_rank          13.56
group5_ensemble_rank         13.56
group6_ebm_rank              13.84
group5_mgbm_rank             17.04
group7_xgb_2_rank            25.20
group8_ebmoversample_rank    25.32
group3_decisiontree_rank     25.58
group7_xgb_rank              26.30
group7_ebm_rank              26.30
group1_mxgb_rank             26.38
group2_ebm_rank              26.46
group2_mxgb_rank             26.46
group6_mxgb_rank             26.60
group3_randomforest_