In [2]:
import numpy as np
import pandas as pd
import pickle
import yaml
import os
import random
import seaborn as sns
baf_model_score = pd.read_parquet('./data/BAF_deployment_score.parquet')
baf = pd.read_parquet('./data/BAF.parquet')

with open('./ml_model/model/model_properties.pickle', 'rb') as infile:
        model_properties = pickle.load(infile)


l = model_properties['threshold']/(1-model_properties['threshold'])

In [3]:
test_index = baf.loc[baf['month'] == 7].index
val_index = baf.loc[baf['month'] == 6].index

val = baf_model_score.loc[val_index]
test = baf_model_score.loc[test_index]
train = baf_model_score.drop(val_index).drop(test_index)

data_cfg_path = './data/dataset_cfg.yaml'

with open(data_cfg_path, 'r') as infile:
    data_cfg = yaml.safe_load(infile)

cat_dict = data_cfg['categorical_dict']

def cat_checker(data, features, cat_dict):
    new_data = data.copy()
    for feature in features:
        if new_data[feature].dtype.categories.to_list() != cat_dict[feature]:
            new_data[feature] = pd.Categorical(new_data[feature].values, categories=cat_dict[feature])
    
    return new_data

CATEGORICAL_COLS = data_cfg['data_cols']['categorical']


# Results

In [8]:
from sklearn.metrics import confusion_matrix

a = dict()
for direc in os.listdir('./testbed/test/'):
    if os.path.isfile(f'./testbed/test/{direc}'):
        continue
    a[direc] = dict()
    a[direc]['bat'] = pd.read_csv('./testbed/test/' + direc + '/batches.csv')
    a[direc]['cap'] = pd.read_csv('./testbed/test/' + direc + '/capacity.csv')


test_env_df = pd.DataFrame(columns = ['training_seed',
                                      'model',
                                      'batch_size',
                                      'batch_seed', 
                                      'absence_rate', 
                                      'absence_seed', 
                                      'distribution', 
                                      'distribution_std', 
                                      'distribution_seed', 
                                      'deferral_rate',
                                      'pool',
                                      'n_errors',
                                      'tp',
                                      'fp',
                                      'fn',
                                      'tn',
                                      'tpr',
                                      'fpr',
                                      'fpr_disp'])

seeds = os.listdir('./deferral_results')
if not os.path.isfile('test_results.parquet'):
    for seed in seeds:
        if seed == 'random':
            continue
        models = os.listdir(f'./deferral_results/{seed}')
        for model in models:
                for test_env_id in a:
                    direc = test_env_id
                    test_env_id = test_env_id.split('#')
                    if test_env_id[0].split('_')[0] == 'large':
                        batch_size = 5000
                    elif test_env_id[0].split('_')[0] == 'small':
                        batch_size = 1000

                    batch_seed = test_env_id[0].split('-')[1]

                    if test_env_id[1].split('_')[0] == 'homogenous':
                        distribution = 'homogenous'
                        distribution_seed = 'NA'
                        distribution_std = 'NA'
                    else:
                        distribution = 'variable'
                        distribution_seed = test_env_id[1].split('_')[0].split('-')[1]
                        distribution_std = '0.2'

                    if test_env_id[1].split('_')[1] == 'fullteam':
                        absence = 0
                        absence_seed = 'NA'
                    else:
                        absence = 0.5
                        absence_seed = test_env_id[1].split('_')[1].split('-')[1]
                    
                    if test_env_id[1].split('_')[2] == 'def20':
                        deferral_rate = 0.2
                    else:
                        deferral_rate = 0.5

                    if test_env_id[1].split('_')[-1] == 'sp':
                        exp_pool = 'sparse'
                    elif test_env_id[1].split('_')[-1] == 'ma':
                        exp_pool = 'agreeing'
                    elif test_env_id[1].split('_')[-1] == 'un':
                        exp_pool = 'unfair'
                    elif test_env_id[1].split('_')[-1] == 'st':
                        exp_pool = 'standard'
                    else:
                        exp_pool = 'all'
                        
                    d = pd.DataFrame(index = test.index)
                    d['prediction'] = 0
                    reviews = pd.read_parquet(f'./deferral_results/{seed}/{model}/{direc}/results.parquet')
                    d.loc[reviews.index,'prediction'] = reviews['prediction']

                    n_errors = ( d['prediction'] != test['fraud_bool']).astype(int).mean()
                    tn, fp, fn, tp = confusion_matrix(y_true = test['fraud_bool'], y_pred = d['prediction']).ravel()
                    tpr = tp/(tp+fn)
                    fpr = fp/(fp+tn)

                    old_ix = test.loc[test['customer_age'] >= 50].index
                    yng_ix = test.loc[test['customer_age'] < 50].index

                    label = test['fraud_bool']

                    old_pred = d['prediction'].loc[old_ix]
                    old_label = label.loc[old_ix]
                    fp_old = ((old_pred == 1) & (old_label == 0)).astype(int).sum()
                    tn_old = ((old_pred == 0) & (old_label == 0)).astype(int).sum()

                    yng_pred = d['prediction'].loc[yng_ix]
                    yng_label = label.loc[yng_ix]
                    fp_yng = ((yng_pred == 1) & (yng_label == 0)).astype(int).sum()
                    tn_yng = ((yng_pred == 0) & (yng_label == 0)).astype(int).sum()

                    fpr_yng = fp_yng/(fp_yng + tn_yng)
                    fpr_old = fp_old/(fp_old + tn_old)

                    fpr_disp =  fpr_yng/fpr_old
                    test_env_df = test_env_df.append(pd.Series([seed,model,batch_size, 
                                                                batch_seed, 
                                                                absence,
                                                                absence_seed, 
                                                                distribution, 
                                                                distribution_std, 
                                                                distribution_seed, 
                                                                deferral_rate,
                                                                exp_pool,
                                                                n_errors,
                                                                tp,
                                                                fp,
                                                                fn,
                                                                tn,
                                                                tpr,
                                                                fpr, 
                                                                fpr_disp], index = test_env_df.columns), ignore_index = True)
                    
            
        print(test_env_df)
        test_results = test_env_df
        test_results['loss_0057'] = (l * test_results['fp'] + test_results['fn']).astype('float')
        test_results.to_parquet('test_results.parquet')
else:
    test_results = pd.read_parquet('test_results.parquet')



In [9]:
from sklearn.metrics import confusion_matrix

a = dict()
for direc in os.listdir('./testbed/test/'):
    if os.path.isfile(f'./testbed/test/{direc}'):
        continue
    a[direc] = dict()
    a[direc]['bat'] = pd.read_csv('./testbed/test/' + direc + '/batches.csv')
    a[direc]['cap'] = pd.read_csv('./testbed/test/' + direc + '/capacity.csv')

test_env_df = pd.DataFrame(columns = ['model',
                                      'batch_size',
                                      'batch_seed', 
                                      'absence_rate', 
                                      'absence_seed', 
                                      'distribution', 
                                      'distribution_std', 
                                      'distribution_seed', 
                                      'deferral_rate',
                                      'exp_pool',
                                      'n_errors',
                                      'tp',
                                      'fp',
                                      'fn',
                                      'tn',
                                      'tpr',
                                      'fpr',
                                      'fpr_disp'])
models = ['random']
if not os.path.isfile('test_results_random.parquet'):
    for model in models:
            for test_env_id in a:
                direc = test_env_id
                test_env_id = test_env_id.split('#')
                if test_env_id[0].split('_')[0] == 'large':
                    batch_size = 5000
                elif test_env_id[0].split('_')[0] == 'small':
                    batch_size = 1000

                batch_seed = test_env_id[0].split('-')[1]

                if test_env_id[1].split('_')[0] == 'homogenous':
                    distribution = 'homogenous'
                    distribution_seed = 'NA'
                    distribution_std = 'NA'
                else:
                    distribution = 'variable'
                    distribution_seed = test_env_id[1].split('_')[0].split('-')[1]
                    distribution_std = '0.2'

                if test_env_id[1].split('_')[1] == 'fullteam':
                    absence = 0
                    absence_seed = 'NA'
                else:
                    absence = 0.5
                    absence_seed = test_env_id[1].split('_')[1].split('-')[1]
                
                if test_env_id[1].split('_')[2] == 'def20':
                    deferral_rate = 0.2
                else:
                    deferral_rate = 0.5
                
                if test_env_id[1].split('_')[-1] == 'sp':
                    exp_pool = 'sparse'
                elif test_env_id[1].split('_')[-1] == 'ma':
                    exp_pool = 'agreeing'
                elif test_env_id[1].split('_')[-1] == 'un':
                    exp_pool = 'unfair'
                elif test_env_id[1].split('_')[-1] == 'st':
                    exp_pool = 'standard'
                else:
                    exp_pool = 'all'
                
                d = pd.DataFrame(index = test.index)
                d['prediction'] = 0
                reviews = pd.read_parquet(f'./deferral_results/{model}/{direc}/results.parquet')
                d.loc[reviews.index,'prediction'] = reviews['prediction']

                n_errors = ( d['prediction'] != test['fraud_bool']).astype(int).mean()
                tn, fp, fn, tp = confusion_matrix(y_true = test['fraud_bool'], y_pred = d['prediction']).ravel()
                tpr = tp/(tp+fn)
                fpr = fp/(fp+tn)

                old_ix = test.loc[test['customer_age'] >= 50].index
                yng_ix = test.loc[test['customer_age'] < 50].index

                label = test['fraud_bool']

                old_pred = d['prediction'].loc[old_ix]
                old_label = label.loc[old_ix]
                fp_old = ((old_pred == 1) & (old_label == 0)).astype(int).sum()
                tn_old = ((old_pred == 0) & (old_label == 0)).astype(int).sum()

                yng_pred = d['prediction'].loc[yng_ix]
                yng_label = label.loc[yng_ix]
                fp_yng = ((yng_pred == 1) & (yng_label == 0)).astype(int).sum()
                tn_yng = ((yng_pred == 0) & (yng_label == 0)).astype(int).sum()

                fpr_yng = fp_yng/(fp_yng + tn_yng)
                fpr_old = fp_old/(fp_old + tn_old)

                fpr_disp =  fpr_yng/fpr_old
                test_env_df = test_env_df.append(pd.Series([model,batch_size, 
                                                            batch_seed, 
                                                            absence,
                                                            absence_seed, 
                                                            distribution, 
                                                            distribution_std, 
                                                            distribution_seed, 
                                                            deferral_rate,
                                                            exp_pool,
                                                            n_errors,
                                                            tp,
                                                            fp,
                                                            fn,
                                                            tn,
                                                            tpr,
                                                            fpr, 
                                                            fpr_disp], index = test_env_df.columns), ignore_index = True)

    test_results_ran = test_env_df
    test_results_ran['loss_0057'] = (l * test_results_ran['fp'] + test_results_ran['fn']).astype('float')
    test_results_ran.to_parquet('test_results_random.parquet')
else:
    test_results_ran = pd.read_parquet('test_results_random.parquet')
        
            


In [10]:
test_results

NameError: name 'test_results' is not defined

In [6]:
tests = test_results
tests_ran = test_results_ran
models = ['OvA', 'DeCCaF_greedy', 'DeCCaF_linear']
seeds = ['small-1_regular',
         'small-2_regular',
         'small-3_regular',
         'small-4_regular',
         'small-5_regular']

NameError: name 'test_results' is not defined

In [None]:
ranks = pd.DataFrame()
ranks['OvA'] = tests.loc[tests['model'] == 'OvA'].groupby(by = ['training_seed', 'batch_size', 'batch_seed', 'absence_rate', 'absence_seed', 'distribution', 'distribution_std', 'distribution_seed', 'deferral_rate', 'pool']).mean().reset_index().loc[:,'loss_0057'].to_numpy()
#ranks['ova_ncs'] = tests.loc[tests['model'] == 'ova_ncs'].groupby(by = ['training_seed', 'batch_size', 'batch_seed', 'absence_rate', 'absence_seed', 'distribution', 'distribution_std', 'distribution_seed', 'deferral_rate', 'pool']).mean().reset_index().loc[:,'loss_0057'].to_numpy()

ranks['DeCCaF_greedy'] = tests.loc[tests['model'] == 'DeCCaF_greedy'].groupby(by = ['training_seed', 'batch_size', 'batch_seed', 'absence_rate', 'absence_seed', 'distribution', 'distribution_std', 'distribution_seed', 'deferral_rate', 'pool']).mean().reset_index().loc[:,'loss_0057'].to_numpy()
ranks['DeCCaF_linear'] = tests.loc[tests['model'] == 'DeCCaF_linear'].groupby(by = ['training_seed', 'batch_size', 'batch_seed', 'absence_rate', 'absence_seed', 'distribution', 'distribution_std', 'distribution_seed', 'deferral_rate', 'pool']).mean().reset_index().loc[:,'loss_0057'].to_numpy()

a = []
for i in range(5):
    a.append(tests_ran.loc[tests_ran['model'] == 'random'].groupby(by = ['batch_size', 'batch_seed', 'absence_rate', 'absence_seed', 'distribution', 'distribution_std', 'distribution_seed', 'deferral_rate', 'exp_pool']).mean().reset_index().loc[:,'loss_0057'].to_numpy())

a = np.concatenate(a)

ranks['ReL'] = a

In [None]:
choices = ['OvA', 'DeCCaF_greedy', 'DeCCaF_linear', 'ReL']

comps = pd.DataFrame(index=choices, columns=choices)
for first in choices:
    for second in choices:
        comps.loc[first,second] = sum((ranks[first]<ranks[second]).astype(int))/len(ranks)
    
comps = comps.astype(float).round(2)
comps.round(2)

NameError: name 'pd' is not defined

In [None]:
results = tests_ran.loc[tests_ran['model'] == 'random'].groupby(by = ['batch_size', 'absence_rate', 'distribution', 'deferral_rate', 'exp_pool']).mean().reset_index().loc[:,['batch_size', 'absence_rate', 'distribution', 'deferral_rate', 'exp_pool']]
ran = tests_ran[tests_ran['model'] == 'random'].groupby(by = ['batch_size', 'absence_rate', 'distribution', 'deferral_rate', 'exp_pool']).mean().reset_index().loc[:,['batch_size', 'absence_rate', 'distribution', 'deferral_rate', 'exp_pool', 'fpr', 'tpr', 'loss_0057', 'fpr_disp']]
ran_std = tests_ran[tests_ran['model'] == 'random'].groupby(by = ['batch_size', 'absence_rate', 'distribution', 'deferral_rate', 'exp_pool']).std().reset_index().loc[:,['batch_size', 'absence_rate', 'distribution', 'deferral_rate', 'exp_pool', 'fpr', 'tpr', 'loss_0057', 'fpr_disp']]

results['loss_ReL'] = '$' + ((ran['loss_0057']).round(0)).astype(int).astype(str) + '\mbox{\scriptsize{$\pm ' + ((1.96*ran_std['loss_0057']).round(0)).astype(int).astype(str) + '$} }$'
results['PE_ReL'] = '$' + ((ran['fpr_disp']).round(2)).astype(str) 
results[f'PE_ReL'].loc[results[f'PE_ReL'].str.len() == 4] += '0'
results[f'PE_ReL'] += '$'

for model in models:
    ran = tests.loc[(tests['model'] == model) & (tests['training_seed'] != 'no_restrictions')].groupby(by = ['batch_size', 'absence_rate', 'distribution', 'deferral_rate', 'pool']).mean().loc[:,['tpr','fpr','loss_0057', 'fpr_disp']].reset_index()
    ran_std = tests.loc[(tests['model'] == model) & (tests['training_seed'] != 'no_restrictions')].groupby(by = ['batch_size', 'absence_rate', 'distribution', 'deferral_rate', 'pool']).std().loc[:,['tpr','fpr','loss_0057', 'fpr_disp']].reset_index()
    #results[f'tpr_{model}'] = ((ran['fpr']*100).round(2)).astype(str) + '+-' + ((ran_std['fpr']*100).round(2)).astype(str)
    #results[f'fpr_{model}'] = ((ran['tpr']*100).round(2)).astype(str) + '+-' + ((ran_std['tpr']*100).round(2)).astype(str)
    results[f'loss_{model}'] = '$' + ((ran['loss_0057']).round(0)).astype(int).astype(str) + '\mbox{\scriptsize{$\pm ' + ((1.96*ran_std['loss_0057']).round(0)).astype(int).astype(str) + '$} }$'
    results[f'PE_{model}'] = '$' + ((ran['fpr_disp']).round(2)).astype(str)
    results[f'PE_{model}'].loc[results[f'PE_{model}'].str.len() == 4] += '0'
    results[f'PE_{model}'] += '$'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [None]:
results

Unnamed: 0,batch_size,absence_rate,distribution,deferral_rate,exp_pool,loss_ReL,PE_ReL,loss_OvA,PE_OvA,loss_DeCCaF_greedy,PE_DeCCaF_greedy,loss_DeCCaF_linear,PE_DeCCaF_linear
0,1000,0.0,homogenous,0.2,agreeing,$787\mbox{\scriptsize{$\pm 24$} }$,$0.37$,$754\mbox{\scriptsize{$\pm 40$} }$,$0.36$,$747\mbox{\scriptsize{$\pm 21$} }$,$0.34$,$759\mbox{\scriptsize{$\pm 23$} }$,$0.31$
1,1000,0.0,homogenous,0.2,all,$806\mbox{\scriptsize{$\pm 19$} }$,$0.19$,$800\mbox{\scriptsize{$\pm 35$} }$,$0.17$,$733\mbox{\scriptsize{$\pm 39$} }$,$0.22$,$725\mbox{\scriptsize{$\pm 50$} }$,$0.21$
2,1000,0.0,homogenous,0.2,sparse,$826\mbox{\scriptsize{$\pm 41$} }$,$0.14$,$822\mbox{\scriptsize{$\pm 55$} }$,$0.15$,$782\mbox{\scriptsize{$\pm 48$} }$,$0.17$,$786\mbox{\scriptsize{$\pm 47$} }$,$0.18$
3,1000,0.0,homogenous,0.2,standard,$791\mbox{\scriptsize{$\pm 25$} }$,$0.17$,$772\mbox{\scriptsize{$\pm 37$} }$,$0.19$,$759\mbox{\scriptsize{$\pm 37$} }$,$0.20$,$745\mbox{\scriptsize{$\pm 23$} }$,$0.20$
4,1000,0.0,homogenous,0.2,unfair,$790\mbox{\scriptsize{$\pm 17$} }$,$0.04$,$782\mbox{\scriptsize{$\pm 64$} }$,$0.04$,$733\mbox{\scriptsize{$\pm 67$} }$,$0.05$,$742\mbox{\scriptsize{$\pm 56$} }$,$0.05$
5,1000,0.0,homogenous,0.5,agreeing,$795\mbox{\scriptsize{$\pm 11$} }$,$0.41$,$724\mbox{\scriptsize{$\pm 52$} }$,$0.39$,$704\mbox{\scriptsize{$\pm 59$} }$,$0.40$,$713\mbox{\scriptsize{$\pm 30$} }$,$0.37$
6,1000,0.0,homogenous,0.5,all,$784\mbox{\scriptsize{$\pm 18$} }$,$0.21$,$773\mbox{\scriptsize{$\pm 63$} }$,$0.20$,$673\mbox{\scriptsize{$\pm 41$} }$,$0.27$,$671\mbox{\scriptsize{$\pm 41$} }$,$0.27$
7,1000,0.0,homogenous,0.5,sparse,$815\mbox{\scriptsize{$\pm 17$} }$,$0.18$,$796\mbox{\scriptsize{$\pm 104$} }$,$0.20$,$728\mbox{\scriptsize{$\pm 88$} }$,$0.22$,$730\mbox{\scriptsize{$\pm 71$} }$,$0.24$
8,1000,0.0,homogenous,0.5,standard,$764\mbox{\scriptsize{$\pm 30$} }$,$0.23$,$746\mbox{\scriptsize{$\pm 44$} }$,$0.24$,$730\mbox{\scriptsize{$\pm 39$} }$,$0.27$,$724\mbox{\scriptsize{$\pm 47$} }$,$0.27$
9,1000,0.0,homogenous,0.5,unfair,$758\mbox{\scriptsize{$\pm 33$} }$,$0.04$,$747\mbox{\scriptsize{$\pm 83$} }$,$0.06$,$697\mbox{\scriptsize{$\pm 77$} }$,$0.06$,$699\mbox{\scriptsize{$\pm 63$} }$,$0.07$
