In [1]:
import pandas as pd
import numpy as np
import os
import sys
sys.path.append(os.environ['CMS_ROOT'])
from cms_modules.utils import dict_from_hdf5
pd.set_option('display.max_columns', 150)
pd.set_option('display.max_rows', 150)

In [15]:
%ls ../../combined-experiments/baselines/validation/results

ls: ../../combined-experiments/baselines/validation/results: No such file or directory


In [16]:
# get list of paths to methods to calculat stats on
# data_path = '../../partD-experiments/baselines/validation/'
data_path = '../../combined-experiments/baselines/validation/'
method_dirs = [os.path.join(data_path, x) for x in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, x)) and '.ipynb' not in x]
thresholds_file = 'thresholds.csv'
valid_metrics = 'valid_metrics.hdf5'

# define output path/metrics to score/epoch to use for roc_auc
output_file = 'best_threshold_stats.csv'
epoch_to_eval = 99

### Helper Functions to Create CSV of Stats

In [17]:
metrics = ['threshold', 'roc_auc', 'tpr', 'tnr', 'geometric_mean', 'arithmetic_mean']
measures = ['mean', 'std', 'min', '25%', '75%', 'max']
columns = [(metric + "_" + measure) for metric in metrics for measure in measures]

# create a dataframe containing all runs for a given method
# calculate the statistics for that method
# return results in comma separated list
def get_stats_for_method_runs(method_path):
    data = { key: [] for key in metrics }
    method_results_path = os.path.join(method_path, 'results')
    runs = os.listdir(method_results_path)
    if len(runs) < 10:
        print('method_results_path', method_results_path, len(runs))
#         return ""
    for run in runs:
        thresholds = pd.read_csv(os.path.join(method_results_path, run, thresholds_file))
        subset = thresholds.loc[thresholds['tpr'] > thresholds['tnr']]
        max_gmean_row = subset.iloc[subset['geometric_mean'].argmax()]
        for key, value in max_gmean_row.items():
            data[key].append(value)
        valid_results = dict_from_hdf5(os.path.join(method_results_path, run, valid_metrics))
        data['roc_auc'].append(valid_results['roc_auc'][epoch_to_eval])
    stats = pd.DataFrame(data).describe()
    print(method_results_path, '\n', stats)
    result = [method_path]
    for metric in metrics:
        for measure in measures:
            result.append(str(round(stats[metric][measure], 4)))
    return ','.join(result) + '\n'

# return comma separated list of column titles
def get_stats_header():
    return ",".join(columns) + "\n"

### Calculate Stats

In [18]:
# create header columns
csv_out = "method," + get_stats_header()

# get statistics for each method
for method in method_dirs:
    csv_out += get_stats_for_method_runs(method)

# write to csv file
with open(output_file, 'w') as f:
    f.write(csv_out)

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.


../../combined-experiments/baselines/validation/d4_w32/results 
        threshold    roc_auc        tpr        tnr  geometric_mean  \
count       10.0  10.000000  10.000000  10.000000       10.000000   
mean         0.0   0.987365   0.993608   0.031263        0.163348   
std          0.0   0.004149   0.003675   0.022517        0.069553   
min          0.0   0.978127   0.988720   0.000260        0.016000   
25%          0.0   0.986855   0.990600   0.020728        0.143052   
50%          0.0   0.987742   0.993420   0.029195        0.170000   
75%          0.0   0.988878   0.995770   0.035228        0.187170   
max          0.0   0.994346   1.000000   0.085880        0.291940   

       arithmetic_mean  
count        10.000000  
mean          0.512437  
std           0.010756  
min           0.500130  
25%           0.506315  
50%           0.510840  
75%           0.515030  
max           0.539180  
../../combined-experiments/baselines/validation/d2_w32/results 
        threshold    roc

### View Results

In [19]:
pd.read_csv(output_file)

Unnamed: 0,method,threshold_mean,threshold_std,threshold_min,threshold_25%,threshold_75%,threshold_max,roc_auc_mean,roc_auc_std,roc_auc_min,roc_auc_25%,roc_auc_75%,roc_auc_max,tpr_mean,tpr_std,tpr_min,tpr_25%,tpr_75%,tpr_max,tnr_mean,tnr_std,tnr_min,tnr_25%,tnr_75%,tnr_max,geometric_mean_mean,geometric_mean_std,geometric_mean_min,geometric_mean_25%,geometric_mean_75%,geometric_mean_max,arithmetic_mean_mean,arithmetic_mean_std,arithmetic_mean_min,arithmetic_mean_25%,arithmetic_mean_75%,arithmetic_mean_max
0,../../combined-experiments/baselines/validatio...,0.0,0.0,0.0,0.0,0.0,0.0,0.9874,0.0041,0.9781,0.9869,0.9889,0.9943,0.9936,0.0037,0.9887,0.9906,0.9958,1.0,0.0313,0.0225,0.0003,0.0207,0.0352,0.0859,0.1633,0.0696,0.016,0.1431,0.1872,0.2919,0.5124,0.0108,0.5001,0.5063,0.515,0.5392
1,../../combined-experiments/baselines/validatio...,0.0,0.0,0.0,0.0,0.0,0.0,0.9958,0.004,0.9869,0.9937,0.9978,0.9995,0.9961,0.0084,0.9774,1.0,1.0,1.0,0.1877,0.3602,0.0005,0.0024,0.0878,0.8765,0.2578,0.3618,0.0231,0.0489,0.2815,0.9256,0.5919,0.1759,0.5003,0.5012,0.5439,0.927
