In [34]:
##############################################
# Import Library
##############################################

from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import scipy
from scipy.stats import bernoulli, norm, zscore

import calibration_metric
import importlib
importlib.reload(calibration_metric)
from calibration_metric import ece, ace, tce, tce_ttest

In [19]:
##############################################
# Prepare Data
##############################################

def toy_data(p_train, p_test, num_train=14000, num_test=6000, seed=0):
    np.random.seed(seed=seed)
    df_train_y = bernoulli.rvs(p_train, size=num_train)
    df_test_y = bernoulli.rvs(p_test, size=num_test)
    df_train_X = norm.rvs(loc=0.5*(2*df_train_y-1), scale=2, size=len(df_train_y))
    df_test_X = norm.rvs(loc=0.5*(2*df_test_y-1), scale=2, size=len(df_test_y))
    return df_train_X.reshape(-1,1), df_train_y, df_test_X.reshape(-1,1), df_test_y


In [20]:
##############################################
# Fit Algrithms and Measure Errors
##############################################

def do_experiment_minbinsize(expid, p_train, p_test):
    #File Name
    resultname = "./Result/Supplement/minbinsize_" + expid
    
    #Dataset
    df_train_X, df_train_y, df_test_X, df_test_y = toy_data(p_train, p_test)
            
    #Fit LR
    alg = LogisticRegression(max_iter=10000, random_state=0)
    alg.fit(df_train_X, df_train_y)
    preds_test = alg.predict_proba(df_test_X)[:,1]
    
    #Setting
    results = np.zeros((7, 7))
    n_mins = [ 1, 6, 30, 60, 300, 600, 3000 ]
    n_maxs = [ 6000, 6000, 6000, 6000, 6000, 6000, 6000 ]
    n_bins = [ 1000, 500, 100, 50, 10, 5, 1 ]
    
    for ith in range(results.shape[0]):
        print("Processing: " + str(expid) + " - " + str(ith))
        
        n_min = n_mins[ith]
        n_max = n_maxs[ith]
        n_bin = n_bins[ith]
        
        v1 = tce(preds_test, df_test_y, strategy="pavabc", n_min=n_min, n_max=n_max)
        v2 = tce(preds_test, df_test_y, strategy="quantile", n_bin=n_bin)
        v3 = tce(preds_test, df_test_y, strategy="pava")
        v4 = ece(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v5 = ace(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v6 = ece(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        v7 = ace(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        results[ith,:] = [ v1, v2, v3, v4, v5, v6, v7 ]
    
    #Plot and Save Table
    #savetxt(resultname+"_allmetrics.csv", results, delimiter=',', fmt='%.4f') 
    table = pd.DataFrame(results)
    table.index = [ str(elem) for elem in n_mins ]
    table.columns = ['TCE(P)', 'TCE(Q)', 'TCE(V)', 'ECE', 'ACE', 'MCE', 'MCE(Q)']
    table.to_csv(resultname+"_allmetrics.csv", float_format="%.4f")
    pd.options.display.float_format = '{:.4f}'.format
    display(table)


do_experiment_minbinsize("A", 0.50, 0.50)
do_experiment_minbinsize("B", 0.50, 0.40)



Processing: A - 0
Processing: A - 1
Processing: A - 2
Processing: A - 3
Processing: A - 4
Processing: A - 5
Processing: A - 6


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
1,3.45,5.1,3.45,0.1143,0.1651,0.8767,0.6392
6,3.3833,4.2,3.45,0.0839,0.1142,0.8767,0.5016
30,2.35,4.3,3.45,0.0382,0.0457,0.8767,0.1705
60,2.6333,3.5667,3.45,0.0271,0.037,0.2533,0.1189
300,7.2833,10.8833,3.45,0.0138,0.015,0.102,0.0528
600,13.5667,38.75,3.45,0.0116,0.0086,0.102,0.0236
3000,92.2,92.2,3.45,0.0021,0.0021,0.0021,0.0021


Processing: B - 0
Processing: B - 1
Processing: B - 2
Processing: B - 3
Processing: B - 4
Processing: B - 5
Processing: B - 6


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
1,88.0667,6.6667,88.0667,0.1417,0.1847,0.8767,0.6111
6,88.0667,8.7,88.0667,0.1179,0.1389,0.8767,0.4811
30,88.3333,32.2833,88.0667,0.0993,0.0992,0.8767,0.2264
60,87.8667,56.7667,88.0667,0.0971,0.0964,0.2426,0.1827
300,96.1,96.4667,88.0667,0.0963,0.0951,0.1466,0.1314
600,96.6,96.7833,88.0667,0.0963,0.0951,0.1099,0.1092
3000,93.95,93.95,88.0667,0.0951,0.0951,0.0951,0.0951


In [46]:
##############################################
# Fit Algrithms and Measure Errors
##############################################

def do_experiment_maxbinsize(expid, p_train, p_test):
    #File Name
    resultname = "./Result/Supplement/maxbinsize_" + expid
    
    #Dataset
    df_train_X, df_train_y, df_test_X, df_test_y = toy_data(p_train, p_test)
            
    #Fit LR
    alg = LogisticRegression(max_iter=10000, random_state=0)
    alg.fit(df_train_X, df_train_y)
    preds_test = alg.predict_proba(df_test_X)[:,1]
    
    #Setting
    results = np.zeros((7, 7))
    n_mins = [ 1, 1, 1, 1, 1, 1, 1 ]
    n_maxs = [ 6, 30, 60, 300, 600, 3000, 6000 ]
    n_bins = [ 1000, 500, 100, 50, 10, 5, 1 ]
    
    for ith in range(results.shape[0]):
        print("Processing: " + str(expid) + " - " + str(ith))
        
        n_min = n_mins[ith]
        n_max = n_maxs[ith]
        n_bin = n_bins[ith]
        
        v1 = tce(preds_test, df_test_y, strategy="pavabc", n_min=n_min, n_max=n_max)
        v2 = tce(preds_test, df_test_y, strategy="quantile", n_bin=n_bin)
        v3 = tce(preds_test, df_test_y, strategy="pava")
        v4 = ece(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v5 = ace(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v6 = ece(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        v7 = ace(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        results[ith,:] = [ v1, v2, v3, v4, v5, v6, v7 ]
    
    #Plot and Save Table
    table = pd.DataFrame(results)
    table.index = [ str(elem) for elem in n_maxs ]
    table.columns = ['TCE(P)', 'TCE(Q)', 'TCE(V)', 'ECE', 'ACE', 'MCE', 'MCE(Q)']
    table.to_csv(resultname+"_allmetrics.csv", float_format="%.4f")
    pd.options.display.float_format = '{:.4f}'.format
    display(table)


do_experiment_maxbinsize("A", 0.50, 0.50)
do_experiment_maxbinsize("B", 0.50, 0.40)



Processing: A - 0
Processing: A - 1
Processing: A - 2
Processing: A - 3
Processing: A - 4
Processing: A - 5
Processing: A - 6


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
6,5.85,5.1,3.45,0.1143,0.1651,0.8767,0.6392
30,3.0,4.2,3.45,0.0839,0.1142,0.8767,0.5016
60,2.3667,4.3,3.45,0.0382,0.0457,0.8767,0.1705
300,3.7667,3.5667,3.45,0.0271,0.037,0.2533,0.1189
600,3.3833,10.8833,3.45,0.0138,0.015,0.102,0.0528
3000,3.45,38.75,3.45,0.0116,0.0086,0.102,0.0236
6000,3.45,92.2,3.45,0.0021,0.0021,0.0021,0.0021


Processing: B - 0
Processing: B - 1
Processing: B - 2
Processing: B - 3
Processing: B - 4
Processing: B - 5
Processing: B - 6


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
6,5.5,6.6667,88.0667,0.1417,0.1847,0.8767,0.6111
30,9.1,8.7,88.0667,0.1179,0.1389,0.8767,0.4811
60,14.3833,32.2833,88.0667,0.0993,0.0992,0.8767,0.2264
300,79.6667,56.7667,88.0667,0.0971,0.0964,0.2426,0.1827
600,85.65,96.4667,88.0667,0.0963,0.0951,0.1466,0.1314
3000,88.0667,96.7833,88.0667,0.0963,0.0951,0.1099,0.1092
6000,88.0667,93.95,88.0667,0.0951,0.0951,0.0951,0.0951


In [47]:
##############################################
# Fit Algrithms and Measure Errors
##############################################

def do_experiment_twobinsize(expid, p_train, p_test):
    #File Name
    resultname = "./Result/Supplement/twobinsize_" + expid
    
    #Dataset
    df_train_X, df_train_y, df_test_X, df_test_y = toy_data(p_train, p_test)
            
    #Fit LR
    alg = LogisticRegression(max_iter=10000, random_state=0)
    alg.fit(df_train_X, df_train_y)
    preds_test = alg.predict_proba(df_test_X)[:,1]
    
    #Setting
    results = np.zeros((5, 7))
    n_mins = [ 6, 30, 60, 300, 600 ]
    n_maxs = [ 24, 120, 240, 1200, 2400 ]
    n_bins = [ 500, 100, 50, 10, 5 ]
    
    for ith in range(results.shape[0]):
        print("Processing: " + str(expid) + " - " + str(ith))
        
        n_min = n_mins[ith]
        n_max = n_maxs[ith]
        n_bin = n_bins[ith]
        
        v1 = tce(preds_test, df_test_y, strategy="pavabc", n_min=n_min, n_max=n_max)
        v2 = tce(preds_test, df_test_y, strategy="quantile", n_bin=n_bin)
        v3 = tce(preds_test, df_test_y, strategy="pava")
        v4 = ece(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v5 = ace(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v6 = ece(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        v7 = ace(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        results[ith,:] = [ v1, v2, v3, v4, v5, v6, v7 ]
    
    #Plot and Save Table
    table = pd.DataFrame(results)
    table.index = [ str(elem) for elem in n_bins ]
    table.columns = ['TCE(P)', 'TCE(Q)', 'TCE(V)', 'ECE', 'ACE', 'MCE', 'MCE(Q)']
    table.to_csv(resultname+"_allmetrics.csv", float_format="%.4f")
    pd.options.display.float_format = '{:.4f}'.format
    display(table)


do_experiment_twobinsize("A", 0.50, 0.50)
do_experiment_twobinsize("B", 0.50, 0.40)



Processing: A - 0
Processing: A - 1
Processing: A - 2
Processing: A - 3
Processing: A - 4


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
500,3.8,4.2,3.45,0.0839,0.1142,0.8767,0.5016
100,1.8333,4.3,3.45,0.0382,0.0457,0.8767,0.1705
50,0.2833,3.5667,3.45,0.0271,0.037,0.2533,0.1189
10,7.2833,10.8833,3.45,0.0138,0.015,0.102,0.0528
5,13.5667,38.75,3.45,0.0116,0.0086,0.102,0.0236


Processing: B - 0
Processing: B - 1
Processing: B - 2
Processing: B - 3
Processing: B - 4


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
500,7.7333,8.7,88.0667,0.1179,0.1389,0.8767,0.4811
100,45.7667,32.2833,88.0667,0.0993,0.0992,0.8767,0.2264
50,66.1833,56.7667,88.0667,0.0971,0.0964,0.2426,0.1827
10,96.1,96.4667,88.0667,0.0963,0.0951,0.1466,0.1314
5,96.6,96.7833,88.0667,0.0963,0.0951,0.1099,0.1092


In [26]:
##############################################
# Fit Algrithms and Measure Errors
##############################################

def do_experiment_perturbation(expid, p_train, p_test):
    #File Name
    resultname = "./Result/Supplement/perturbation_" + expid
    
    #Dataset
    df_train_X, df_train_y, df_test_X, df_test_y = toy_data(p_train, p_test)
    
    #Fit LR
    alg = LogisticRegression(max_iter=10000, random_state=0)
    alg.fit(df_train_X, df_train_y)
    preds_test = alg.predict_proba(df_test_X)[:,1]

    #Setting
    results = np.zeros((6, 7))
    noises = [ 0.0, 0.01, 0.05, 0.1, 0.5, 1.0 ]
    n_min = int( len(df_test_y) / 20 )
    n_max = int( len(df_test_y) / 5 )
    n_bin = 10        
    
    for ith in range(results.shape[0]):
        print("Processing: " + str(expid) + " - " + str(ith))
        
        noise = noises[ith] 
        preds = np.log(preds_test) - np.log(1 - preds_test) + noise*np.random.normal(0.0, 1.0, preds_test.shape[0])
        preds = 1.0 / ( 1.0 + np.exp( - preds ) )
        
        v1 = tce(preds, df_test_y, strategy="pavabc", n_min=n_min, n_max=n_max)
        v2 = tce(preds, df_test_y, strategy="quantile", n_bin=n_bin)
        v3 = tce(preds, df_test_y, strategy="pava")
        v4 = ece(preds, df_test_y, n_bin=n_bin, mode='l1')
        v5 = ace(preds, df_test_y, n_bin=n_bin, mode='l1')
        v6 = ece(preds, df_test_y, n_bin=n_bin, mode='inf')
        v7 = ace(preds, df_test_y, n_bin=n_bin, mode='inf')
        results[ith,:] = [ v1, v2, v3, v4, v5, v6, v7 ]
    
    #Plot and Save Table
    table = pd.DataFrame(results)
    table.index = [ str(elem) for elem in noises ]
    table.columns = ['TCE(P)', 'TCE(Q)', 'TCE(V)', 'ECE', 'ACE', 'MCE', 'MCE(Q)']
    table.to_csv(resultname+"_allmetrics.csv", float_format="%.4f")
    pd.options.display.float_format = '{:.4f}'.format
    display(table)


do_experiment_perturbation("A", 0.50, 0.50)
do_experiment_perturbation("B", 0.50, 0.40)



Processing: A - 0
Processing: A - 1
Processing: A - 2
Processing: A - 3
Processing: A - 4
Processing: A - 5


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
0.0,7.2833,10.8833,3.45,0.0138,0.015,0.102,0.0528
0.01,8.7167,9.6167,4.8,0.0113,0.0125,0.0923,0.0527
0.05,12.8833,11.9,7.7667,0.0136,0.0156,0.1198,0.0589
0.1,8.35,13.05,3.55,0.0109,0.0164,0.1143,0.0587
0.5,61.95,65.05,56.1,0.0615,0.0618,0.3601,0.1498
1.0,86.1833,84.1,88.3833,0.147,0.1478,0.3364,0.2621


Processing: B - 0
Processing: B - 1
Processing: B - 2
Processing: B - 3
Processing: B - 4
Processing: B - 5


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
0.0,96.1,96.4667,88.0667,0.0963,0.0951,0.1466,0.1314
0.01,96.4,96.4,89.6167,0.0962,0.0951,0.1511,0.1332
0.05,94.7667,95.5333,89.1667,0.0962,0.0951,0.1496,0.142
0.1,93.85,95.9667,86.5833,0.0967,0.0951,0.1852,0.1412
0.5,86.6667,83.9,81.2667,0.1071,0.1055,0.2513,0.2203
1.0,90.3167,88.85,91.2167,0.1713,0.1698,0.4577,0.3648


In [27]:
##############################################
# Fit Algrithms and Measure Errors
##############################################

def do_experiment_siglevel(expid, p_train, p_test):
    #File Name
    resultname = "./Result/Supplement/siglevel_" + expid
    
    #Dataset
    df_train_X, df_train_y, df_test_X, df_test_y = toy_data(p_train, p_test)
            
    #Fit LR
    alg = LogisticRegression(max_iter=10000, random_state=0)
    alg.fit(df_train_X, df_train_y)
    preds_test = alg.predict_proba(df_test_X)[:,1]

    #Setting
    results = np.zeros((6, 7))
    siglevels = [ 0.001, 0.005, 0.01, 0.05, 0.1, 0.5 ]
    n_min = int( len(df_test_y) / 20 )
    n_max = int( len(df_test_y) / 5 )
    n_bin = 10
    
    for ith in range(results.shape[0]):
        print("Processing: " + str(expid) + " - " + str(ith))
        
        siglevel = siglevels[ith]
        
        v1 = tce(preds_test, df_test_y, siglevel=siglevel, strategy="pavabc", n_min=n_min, n_max=n_max)
        v2 = tce(preds_test, df_test_y, siglevel=siglevel, strategy="quantile", n_bin=n_bin)
        v3 = tce(preds_test, df_test_y, siglevel=siglevel, strategy="pava")
        v4 = ece(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v5 = ace(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v6 = ece(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        v7 = ace(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        results[ith,:] = [ v1, v2, v3, v4, v5, v6, v7 ]
    
    #Plot and Save Table
    table = pd.DataFrame(results)
    table.index = [ str(elem) for elem in siglevels ]
    table.columns = ['TCE(P)', 'TCE(Q)', 'TCE(V)', 'ECE', 'ACE', 'MCE', 'MCE(Q)']
    table.to_csv(resultname+"_allmetrics.csv", float_format="%.4f")
    pd.options.display.float_format = '{:.4f}'.format
    display(table)


do_experiment_siglevel("A", 0.50, 0.50)
do_experiment_siglevel("B", 0.50, 0.40)



Processing: A - 0
Processing: A - 1
Processing: A - 2
Processing: A - 3
Processing: A - 4
Processing: A - 5


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
0.001,1.45,4.6833,0.1833,0.0138,0.015,0.102,0.0528
0.005,2.4667,5.5667,1.1333,0.0138,0.015,0.102,0.0528
0.01,3.05,6.2,1.65,0.0138,0.015,0.102,0.0528
0.05,7.2833,10.8833,3.45,0.0138,0.015,0.102,0.0528
0.1,12.85,15.2,6.8667,0.0138,0.015,0.102,0.0528
0.5,53.1,55.3333,46.5667,0.0138,0.015,0.102,0.0528


Processing: B - 0
Processing: B - 1
Processing: B - 2
Processing: B - 3
Processing: B - 4
Processing: B - 5


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
0.001,77.8,83.3833,76.1,0.0963,0.0951,0.1466,0.1314
0.005,86.3,92.8,80.0833,0.0963,0.0951,0.1466,0.1314
0.01,90.1833,95.2167,83.1167,0.0963,0.0951,0.1466,0.1314
0.05,96.1,96.4667,88.0667,0.0963,0.0951,0.1466,0.1314
0.1,97.2167,96.9167,90.1667,0.0963,0.0951,0.1466,0.1314
0.5,99.3,98.7167,97.75,0.0963,0.0951,0.1466,0.1314


In [38]:
##############################################
# Fit Algrithms and Measure Errors
##############################################

def do_experiment_testtype(expid, p_train, p_test):
    #File Name
    resultname = "./Result/Supplement/testtype_" + expid
    
    #Dataset
    df_train_X, df_train_y, df_test_X, df_test_y = toy_data(p_train, p_test)
            
    #Fit LR
    alg = LogisticRegression(max_iter=10000, random_state=0)
    alg.fit(df_train_X, df_train_y)
    preds_test = alg.predict_proba(df_test_X)[:,1]

    #Setting
    results = np.zeros((5, 6))
    n_mins = [ 6, 30, 60, 300, 600 ]
    n_maxs = [ 24, 120, 240, 1200, 2400 ]
    n_bins = [ 500, 100, 50, 10, 5 ]
    
    for ith in range(results.shape[0]):
        print("Processing: " + str(expid) + " - " + str(ith))
        
        n_min = n_mins[ith]
        n_max = n_maxs[ith]
        n_bin = n_bins[ith]
        
        v1_binom = tce(preds_test, df_test_y, strategy="pavabc", n_min=n_min, n_max=n_max)
        v1_ttest = tce_ttest(preds_test, df_test_y, strategy="pavabc", n_min=n_min, n_max=n_max)
        v2_binom = tce(preds_test, df_test_y, strategy="quantile", n_bin=n_bin)
        v2_ttest = tce_ttest(preds_test, df_test_y, strategy="quantile", n_bin=n_bin)
        v3_binom = tce(preds_test, df_test_y, strategy="pava")
        v4_ttest = tce_ttest(preds_test, df_test_y, strategy="pava")
        results[ith,:] = [ v1_binom, v1_ttest, v2_binom, v2_ttest, v3_binom, v4_ttest ]
    
    #Plot and Save Table
    table = pd.DataFrame(results)
    table.index = [ str(elem) for elem in n_mins ]
    table.columns = ['TCE(P)-B', 'TCE(P)-T', 'TCE(Q)-B', 'TCE(Q)-T', 'TCE(V)-B', 'TCE(V)-T' ]
    table.to_csv(resultname+"_allmetrics.csv", float_format="%.4f")
    pd.options.display.float_format = '{:.4f}'.format
    display(table)


do_experiment_testtype("A", 0.50, 0.50)
do_experiment_testtype("B", 0.50, 0.40)



Processing: A - 0
Processing: A - 1
Processing: A - 2
Processing: A - 3
Processing: A - 4


Unnamed: 0,TCE(P)-B,TCE(P)-T,TCE(Q)-B,TCE(Q)-T,TCE(V)-B,TCE(V)-T
6,3.8,33.6667,4.2,31.9167,3.45,34.2167
30,1.8333,36.0,4.3,31.4333,3.45,34.2167
60,0.2833,31.3667,3.5667,40.4333,3.45,34.2167
300,7.2833,37.8,10.8833,41.85,3.45,34.2167
600,13.5667,46.5,38.75,68.8167,3.45,34.2167


Processing: B - 0
Processing: B - 1
Processing: B - 2
Processing: B - 3
Processing: B - 4


Unnamed: 0,TCE(P)-B,TCE(P)-T,TCE(Q)-B,TCE(Q)-T,TCE(V)-B,TCE(V)-T
6,7.7333,50.2833,8.7,45.1833,88.0667,97.7333
30,45.7667,73.2667,32.2833,71.1667,88.0667,97.7333
60,66.1833,96.5333,56.7667,85.3833,88.0667,97.7333
300,96.1,99.2667,96.4667,98.4833,88.0667,97.7333
600,96.6,98.6333,96.7833,98.4833,88.0667,97.7333


In [41]:
##############################################
# Fit Algrithms and Measure Errors
##############################################

def do_experiment_datasize(expid, p_train, p_test):
    #File Name
    resultname = "./Result/Supplement/datasize_" + expid
    
    #Setting
    results = np.zeros((8, 7))
    num_trains = [ 70, 140, 700, 1400, 7000, 14000, 70000, 140000 ]
    num_tests = [ 30, 60, 300, 600, 3000, 6000, 30000, 60000 ]
    
    for ith in range(results.shape[0]):
        print("Processing: " + str(expid) + " - " + str(ith))
        
        #Dataset
        df_train_X, df_train_y, df_test_X, df_test_y = toy_data(p_train, p_test, num_train=num_trains[ith], num_test=num_tests[ith])
            
        #Fit LR
        alg = LogisticRegression(max_iter=10000, random_state=0)
        alg.fit(df_train_X, df_train_y)
        preds_test = alg.predict_proba(df_test_X)[:,1]
        
        n_min = int( len(df_test_y) / 20 )
        n_max = int( len(df_test_y) / 5 )
        n_bin = 10
        
        v1 = tce(preds_test, df_test_y, strategy="pavabc", n_min=n_min, n_max=n_max)
        v2 = tce(preds_test, df_test_y, strategy="quantile", n_bin=n_bin)
        v3 = tce(preds_test, df_test_y, strategy="pava")
        v4 = ece(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v5 = ace(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v6 = ece(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        v7 = ace(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        results[ith,:] = [ v1, v2, v3, v4, v5, v6, v7 ]
    
    #Plot and Save Table
    table = pd.DataFrame(results)
    table.index = [ str(elem) for elem in num_tests ]
    table.columns = ['TCE(P)', 'TCE(Q)', 'TCE(V)', 'ECE', 'ACE', 'MCE', 'MCE(Q)']
    table.to_csv(resultname+"_allmetrics.csv", float_format="%.4f")
    pd.options.display.float_format = '{:.4f}'.format
    display(table)


do_experiment_datasize("A", 0.50, 0.50)
do_experiment_datasize("B", 0.50, 0.40)



Processing: A - 0
Processing: A - 1
Processing: A - 2
Processing: A - 3
Processing: A - 4
Processing: A - 5
Processing: A - 6
Processing: A - 7


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
30,0.0,0.0,0.0,0.2293,0.2631,0.4164,0.566
60,0.0,3.3333,0.0,0.0923,0.2158,0.7148,0.4208
300,5.3333,11.0,6.3333,0.0774,0.0867,0.1971,0.2057
600,1.0,4.5,1.6667,0.0368,0.0445,0.3404,0.127
3000,8.0667,4.6333,4.7667,0.019,0.0182,0.1209,0.0304
6000,7.2833,10.8833,3.45,0.0138,0.015,0.102,0.0528
30000,16.1633,31.7167,0.7833,0.0036,0.0061,0.9045,0.0164
60000,19.1483,45.76,4.4417,0.0035,0.0043,0.0949,0.01


Processing: B - 0
Processing: B - 1
Processing: B - 2
Processing: B - 3
Processing: B - 4
Processing: B - 5
Processing: B - 6
Processing: B - 7


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
30,13.3333,6.6667,36.6667,0.3164,0.3377,0.6569,0.6338
60,0.0,3.3333,0.0,0.1072,0.1611,0.7148,0.4208
300,27.3333,37.3333,48.3333,0.124,0.1368,0.1971,0.2665
600,14.1667,8.0,26.5,0.0694,0.0685,0.5824,0.135
3000,92.2333,91.7667,76.7667,0.0964,0.0958,0.1495,0.1358
6000,96.1,96.4667,88.0667,0.0963,0.0951,0.1466,0.1314
30000,99.47,99.23,97.4433,0.0907,0.0906,0.9045,0.1064
60000,99.7783,99.66,98.9,0.0923,0.0923,0.0972,0.1065


In [45]:
##############################################
# Fit Algrithms and Measure Errors
##############################################

def do_experiment_differentp(expid, p_trains, p_tests):
    #File Name
    resultname = "./Result/Supplement/differentp_" + expid
    
    #Setting
    results = np.zeros((len(p_trains), 7))
    
    for ith in range(len(p_trains)):
        print("Processing: " + str(expid) + " - " + str(ith))
        
        #Dataset
        p_train = p_trains[ith]
        p_test = p_tests[ith]
        df_train_X, df_train_y, df_test_X, df_test_y = toy_data(p_train, p_test)
            
        #Fit LR
        alg = LogisticRegression(max_iter=10000, random_state=0)
        alg.fit(df_train_X, df_train_y)
        preds_test = alg.predict_proba(df_test_X)[:,1]
        
        n_min = int( len(df_test_y) / 20 )
        n_max = int( len(df_test_y) / 5 )
        n_bin = 10
        
        v1 = tce(preds_test, df_test_y, strategy="pavabc", n_min=n_min, n_max=n_max)
        v2 = tce(preds_test, df_test_y, strategy="quantile", n_bin=n_bin)
        v3 = tce(preds_test, df_test_y, strategy="pava")
        v4 = ece(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v5 = ace(preds_test, df_test_y, n_bin=n_bin, mode='l1')
        v6 = ece(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        v7 = ace(preds_test, df_test_y, n_bin=n_bin, mode='inf')
        results[ith,:] = [ v1, v2, v3, v4, v5, v6, v7 ]
    
    #Plot and Save Table
    table = pd.DataFrame(results)
    table.index = [ str(elem) for elem in p_trains ]
    table.columns = ['TCE(P)', 'TCE(Q)', 'TCE(V)', 'ECE', 'ACE', 'MCE', 'MCE(Q)']
    table.to_csv(resultname+"_allmetrics.csv", float_format="%.4f")
    pd.options.display.float_format = '{:.4f}'.format
    display(table)


do_experiment_differentp("A", [0.50, 0.40, 0.30, 0.20, 0.10, 0.08, 0.06, 0.04, 0.02], [0.50, 0.40, 0.30, 0.20, 0.10, 0.08, 0.06, 0.04, 0.02])
do_experiment_differentp("B", [0.50, 0.40, 0.30, 0.20, 0.10, 0.08, 0.06, 0.04, 0.02], [0.40, 0.30, 0.20, 0.10, 0.08, 0.06, 0.04, 0.02, 0.00])



Processing: A - 0
Processing: A - 1
Processing: A - 2
Processing: A - 3
Processing: A - 4
Processing: A - 5
Processing: A - 6
Processing: A - 7
Processing: A - 8


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
0.5,7.2833,10.8833,3.45,0.0138,0.015,0.102,0.0528
0.4,7.55,16.2167,8.5667,0.0137,0.0191,0.1632,0.0365
0.3,8.1667,12.8833,2.8167,0.0125,0.0134,0.1042,0.0313
0.2,15.95,22.2167,15.9167,0.0173,0.0153,0.6238,0.037
0.1,11.9833,16.7833,15.2333,0.0096,0.0114,0.4361,0.0218
0.08,15.7,18.5167,23.15,0.0087,0.0107,0.07,0.0234
0.06,11.5333,17.55,13.9833,0.0035,0.0109,0.3064,0.0195
0.04,18.5,15.6667,20.5833,0.0046,0.0074,0.224,0.0177
0.02,13.1167,11.55,20.7667,0.0052,0.0059,0.0052,0.0131


Processing: B - 0
Processing: B - 1
Processing: B - 2
Processing: B - 3
Processing: B - 4
Processing: B - 5
Processing: B - 6
Processing: B - 7
Processing: B - 8


Unnamed: 0,TCE(P),TCE(Q),TCE(V),ECE,ACE,MCE,MCE(Q)
0.5,96.1,96.4667,88.0667,0.0963,0.0951,0.1466,0.1314
0.4,96.5667,96.1833,82.75,0.0872,0.0869,0.1485,0.1262
0.3,94.95,94.6667,88.5833,0.0846,0.0846,0.2146,0.1247
0.2,95.8833,95.5833,96.4333,0.0868,0.0868,0.6238,0.15
0.1,32.35,26.7,42.5667,0.0151,0.0173,0.4361,0.0502
0.08,42.3167,38.7833,45.8333,0.0164,0.0186,0.3259,0.0477
0.06,47.0833,39.95,65.95,0.0167,0.0188,0.3064,0.044
0.04,56.5833,42.4333,72.45,0.0142,0.0142,0.224,0.0337
0.02,99.9167,96.9,100.0,0.0181,0.0181,0.0181,0.0382
