In [1]:
clf_names = ["CART", "EBM", "GNB", "LR", "LR_l2", "DL"]
dataset_names = ['breast', 'campus', 'churn', 'climate',
            'compas', 'diabetes', 'german', 'heart',
            'adult', 'student', 'bank', 'credit']

In [2]:
import pandas as pd 
import numpy as np

In [3]:
def scale_values(worst, best, value):
    y = [0, 100]
    x = [worst, best]
    coeffs = np.polyfit(x, y, 1)
    return coeffs[0]*value + coeffs[1]

### CART GENERATOR

In [4]:
clf_list_cart = ["EBM", "GNB", "LR", "LR_l2", "DL"]
clf_n = len(clf_list_cart)
dataset_n = len(dataset_names)
cart_results = np.zeros(shape=(dataset_n, clf_n))
cart_scaled_results = np.zeros(shape=(dataset_n, clf_n))

clf_name_gen = "CART"

for data_id, dataset in enumerate(dataset_names):
    result_df = pd.read_csv(f"../worst-case_results/{dataset}_{clf_name_gen}.csv")
    result_df = result_df.drop(clf_name_gen, axis=1)

    result_df = result_df.groupby(by="Param").mean()

    for clf_id, clf_name in enumerate(clf_list_cart):
        cart_results[data_id, clf_id] = result_df[clf_name].sum()
    
    
    best = cart_results[data_id].min()
    worst = cart_results[data_id].max()

    for i in range(len(cart_results[data_id])):
        value = np.abs(np.round(scale_values(worst, best, value=cart_results[data_id][i]), decimals=2))
        cart_scaled_results[data_id, i] = value
        

In [5]:
cart_scaled_results

array([[100.  ,  57.92,  39.2 ,  74.33,   0.  ],
       [100.  ,  83.76,  88.58,  91.03,   0.  ],
       [100.  ,  45.08,   5.78,   0.  ,  49.08],
       [ 97.26, 100.  ,  90.91,  95.67,   0.  ],
       [100.  ,   0.  ,  88.9 ,  88.79,  31.9 ],
       [ 92.68,   0.  , 100.  ,  86.8 ,  17.4 ],
       [100.  ,  13.5 ,  82.91,  87.06,   0.  ],
       [100.  ,  69.23,  95.67,  97.5 ,   0.  ],
       [100.  ,   0.  ,  87.91,  88.1 ,  81.87],
       [100.  ,  64.54,  97.03,  98.67,   0.  ],
       [100.  ,   0.  ,  72.46,  72.85,  98.45],
       [100.  ,   0.  ,  50.61,  51.98,  26.76]])

### EBM GENERATOR

In [6]:
clf_list_ebm = ["CART", "GNB", "LR", "LR_l2", "DL"]
clf_n = len(clf_list_cart)
dataset_n = len(dataset_names)
ebm_results = np.zeros(shape=(dataset_n, clf_n))
ebm_scaled_results = np.zeros(shape=(dataset_n, clf_n))

clf_name_gen = "EBM"

for data_id, dataset in enumerate(dataset_names):
    result_df = pd.read_csv(f"../worst-case_results/{dataset}_{clf_name_gen}.csv")
    result_df = result_df.drop(clf_name_gen, axis=1)

    result_df = result_df.groupby(by="Param").mean()

    for clf_id, clf_name in enumerate(clf_list_ebm):
        ebm_results[data_id, clf_id] = result_df[clf_name].sum()
    
    
    best = ebm_results[data_id].min()
    worst = ebm_results[data_id].max()

    for i in range(len(ebm_results[data_id])):
        value = np.abs(np.round(scale_values(worst, best, value=ebm_results[data_id][i]), decimals=2))
        ebm_scaled_results[data_id, i] = value
        

In [7]:
ebm_scaled_results

array([[100.  ,  67.07,  47.54,  89.5 ,   0.  ],
       [ 98.56,  97.69,  97.31, 100.  ,   0.  ],
       [100.  ,  65.8 ,   2.38,   0.  ,  60.58],
       [ 98.32,  89.49, 100.  ,  94.31,   0.  ],
       [ 94.34,   0.  ,  95.49, 100.  ,  19.35],
       [100.  ,  46.  ,  80.59,  84.09,   0.  ],
       [ 92.48,   0.  , 100.  ,  94.75,  62.68],
       [ 98.05,  99.43, 100.  ,  96.82,   0.  ],
       [100.  ,   0.  ,  94.15,  92.31,  72.7 ],
       [ 97.77, 100.  ,  97.65,  98.65,   0.  ],
       [ 99.24,   0.  ,  96.12,  96.87, 100.  ],
       [100.  ,  11.46,  73.37,  68.58,   0.  ]])

### GNB GENERATOR

In [8]:
clf_list_gnb = ["CART", "EBM", "LR", "LR_l2", "DL"]
clf_n = len(clf_list_cart)
dataset_n = len(dataset_names)
gnb_results = np.zeros(shape=(dataset_n, clf_n))
gnb_scaled_results = np.zeros(shape=(dataset_n, clf_n))

clf_name_gen = "GNB"

for data_id, dataset in enumerate(dataset_names):
    result_df = pd.read_csv(f"../worst-case_results/{dataset}_{clf_name_gen}.csv")
    result_df = result_df.drop(clf_name_gen, axis=1)

    result_df = result_df.groupby(by="Param").mean()

    for clf_id, clf_name in enumerate(clf_list_gnb):
        gnb_results[data_id, clf_id] = result_df[clf_name].sum()
    
    
    best = gnb_results[data_id].min()
    worst = gnb_results[data_id].max()

    for i in range(len(gnb_results[data_id])):
        value = np.abs(np.round(scale_values(worst, best, value=gnb_results[data_id][i]), decimals=2))
        gnb_scaled_results[data_id, i] = value

In [9]:
gnb_scaled_results

array([[ 82.4 ,  79.27, 100.  ,  89.79,   0.  ],
       [  0.  ,   6.48,  10.53,   8.68, 100.  ],
       [ 55.38,  91.94, 100.  ,  98.18,   0.  ],
       [ 97.79,  99.44,  57.96, 100.  ,   0.  ],
       [ 87.43,  77.93, 100.  ,  82.62,   0.  ],
       [100.  ,  82.18,  69.89,  73.62,   0.  ],
       [ 93.56,  85.63, 100.  ,  97.76,   0.  ],
       [ 87.7 , 100.  ,  85.86,  92.11,   0.  ],
       [  0.  ,  25.54,  30.44,  30.83, 100.  ],
       [ 76.39,  74.71,  86.49, 100.  ,   0.  ],
       [100.  ,  83.19,  97.2 ,  94.32,   0.  ],
       [ 64.81,   7.08, 100.  ,  76.74,   0.  ]])

### DL GENERATOR

In [10]:
clf_list_dl = ["CART", "EBM", "LR", "LR_l2", "GNB"]
clf_n = len(clf_list_cart)
dataset_n = len(dataset_names)
dl_results = np.zeros(shape=(dataset_n, clf_n))
dl_scaled_results = np.zeros(shape=(dataset_n, clf_n))

clf_name_gen = "DL"

for data_id, dataset in enumerate(dataset_names):
    result_df = pd.read_csv(f"../worst-case_results/{dataset}_{clf_name_gen}.csv")
    result_df = result_df.drop(clf_name_gen, axis=1)

    result_df = result_df.groupby(by="Param").mean()

    for clf_id, clf_name in enumerate(clf_list_dl):
        dl_results[data_id, clf_id] = result_df[clf_name].sum()
    
    
    best = dl_results[data_id].min()
    worst = dl_results[data_id].max()

    for i in range(len(dl_results[data_id])):
        value = np.abs(np.round(scale_values(worst, best, value=dl_results[data_id][i]), decimals=2))
        dl_scaled_results[data_id, i] = value

In [11]:
dl_scaled_results

array([[  0.  , 100.  ,  44.53,  60.65,  52.56],
       [ 92.74, 100.  ,  87.61,  99.66,   0.  ],
       [ 83.72, 100.  ,   0.  ,  74.18,  72.37],
       [100.  ,   0.  ,  81.07,  79.41,  87.34],
       [  2.85, 100.  ,  45.44,  45.63,   0.  ],
       [ 62.8 ,   0.  ,  64.42, 100.  ,  54.41],
       [ 96.49, 100.  ,  92.03,  99.95,   0.  ],
       [ 75.82, 100.  ,  72.94,  76.42,   0.  ],
       [100.  ,  95.17,  68.88,  73.13,   0.  ],
       [ 93.9 ,  20.58,  88.67, 100.  ,   0.  ],
       [ 93.06,  95.53, 100.  ,  95.66,   0.  ],
       [  0.  ,  16.13,  73.76, 100.  ,   6.81]])

### LR_l2 GENERATOR

In [12]:
clf_list_lr_l2 = ["CART", "EBM", "LR", "DL", "GNB"]
clf_n = len(clf_list_cart)
dataset_n = len(dataset_names)
lr_l2_results = np.zeros(shape=(dataset_n, clf_n))
lr_l2_scaled_results = np.zeros(shape=(dataset_n, clf_n))

clf_name_gen = "LR_l2"

for data_id, dataset in enumerate(dataset_names):
    result_df = pd.read_csv(f"../worst-case_results/{dataset}_{clf_name_gen}.csv")
    result_df = result_df.drop(clf_name_gen, axis=1)

    result_df = result_df.groupby(by="Param").mean()

    for clf_id, clf_name in enumerate(clf_list_lr_l2):
        lr_l2_results[data_id, clf_id] = result_df[clf_name].sum()
    
    
    best = lr_l2_results[data_id].min()
    worst = lr_l2_results[data_id].max()

    for i in range(len(lr_l2_results[data_id])):
        value = np.abs(np.round(scale_values(worst, best, value=lr_l2_results[data_id][i]), decimals=2))
        lr_l2_scaled_results[data_id, i] = value

In [13]:
lr_l2_scaled_results

array([[ 80.06,  88.84, 100.  ,   0.  ,  72.2 ],
       [ 67.96,  85.5 , 100.  ,   0.  ,  78.04],
       [ 90.59, 100.  ,  83.09,  31.66,   0.  ],
       [ 92.51,  95.26,  94.83,   0.  , 100.  ],
       [ 91.22,  91.79, 100.  ,  69.33,   0.  ],
       [ 77.49,  81.56, 100.  ,   0.  ,  40.44],
       [ 76.92,  96.65, 100.  ,   1.58,   0.  ],
       [ 88.31,  94.88, 100.  ,   0.  ,  88.54],
       [ 91.94,  90.16, 100.  ,  48.18,   0.  ],
       [ 91.47,  98.07, 100.  ,   0.  ,  82.5 ],
       [ 89.51,  83.46, 100.  ,  63.87,   0.  ],
       [ 42.96,  43.15, 100.  ,   0.  ,  75.71]])

### LR GENERATOR

In [14]:
clf_list_lr = ["CART", "EBM", "LR_l2", "DL", "GNB"]
clf_n = len(clf_list_cart)
dataset_n = len(dataset_names)
lr_results = np.zeros(shape=(dataset_n, clf_n))
lr_scaled_results = np.zeros(shape=(dataset_n, clf_n))

clf_name_gen = "LR"

for data_id, dataset in enumerate(dataset_names):
    result_df = pd.read_csv(f"../worst-case_results/{dataset}_{clf_name_gen}.csv")
    result_df = result_df.drop(clf_name_gen, axis=1)

    result_df = result_df.groupby(by="Param").mean()

    for clf_id, clf_name in enumerate(clf_list_lr):
        lr_results[data_id, clf_id] = result_df[clf_name].sum()
    
    
    best = lr_results[data_id].min()
    worst = lr_results[data_id].max()

    for i in range(len(lr_results[data_id])):
        value = np.abs(np.round(scale_values(worst, best, value=lr_results[data_id][i]), decimals=2))
        lr_scaled_results[data_id, i] = value

In [15]:
lr_scaled_results

array([[ 74.77,  83.56, 100.  ,   0.  ,  89.22],
       [ 71.99,  83.03, 100.  ,   0.  ,  71.82],
       [ 99.91, 100.  ,  92.63,   0.  ,  26.46],
       [ 85.11,  90.01, 100.  ,   0.  ,  98.61],
       [ 93.01,  95.09, 100.  ,  71.55,   0.  ],
       [ 83.02,  84.33, 100.  ,   0.  ,  45.  ],
       [ 86.78,  99.25, 100.  ,  35.68,   0.  ],
       [ 95.7 ,  98.26,  93.79,   0.  , 100.  ],
       [ 93.06,  91.8 , 100.  ,  61.95,   0.  ],
       [ 89.8 , 100.  ,  94.  ,   0.  ,  86.96],
       [ 88.76,  97.92, 100.  ,  82.96,   0.  ],
       [ 71.5 ,  46.56, 100.  ,   0.  ,  77.67]])

### CART Results

In [16]:
cart_1 = ebm_scaled_results[:,0]
cart_2 = gnb_scaled_results[:,0]
cart_3 = dl_scaled_results[:,0]
cart_4 = lr_l2_scaled_results[:,0]
cart_5 = lr_scaled_results[:, 0]
cart_list = [cart_1, cart_2, cart_3, cart_4, cart_5]
cart_fin = pd.DataFrame(cart_list).T.mean(axis=1)

In [17]:
cart_fin

0     67.446
1     66.250
2     85.920
3     94.746
4     73.770
5     84.662
6     89.246
7     89.116
8     77.000
9     89.866
10    94.114
11    55.854
dtype: float64

In [18]:
cart_fin.mean()

80.66583333333334

In [19]:
gnb_1 = ebm_scaled_results[:,1]
gnb_2 = dl_scaled_results[:,4]
gnb_3 = lr_l2_scaled_results[:,4]
gnb_4 = lr_scaled_results[:,4]
gnb_5 = cart_scaled_results[:,1]
gnb_list = [gnb_1, gnb_2, gnb_3, gnb_4, gnb_5]
gnb_fin = pd.DataFrame(gnb_list).T.mean(axis=1)

In [20]:
gnb_fin

0     67.794
1     66.262
2     41.942
3     95.088
4      0.000
5     37.170
6      2.700
7     71.440
8      0.000
9     66.800
10     0.000
11    34.330
dtype: float64

In [21]:
gnb_fin.mean()

40.29383333333333

In [22]:
lr_1 = ebm_scaled_results[:,2]
lr_2 = gnb_scaled_results[:,2]
lr_3 = dl_scaled_results[:, 2]
lr_4 = lr_l2_scaled_results[:,2]
lr_5 = cart_scaled_results[:,2]
lr_list = [lr_1, lr_2, lr_3, lr_4, lr_5]
lr_fin = pd.DataFrame(lr_list).T.mean(axis=1)

In [23]:
lr_fin

0     66.254
1     76.806
2     38.250
3     84.954
4     85.966
5     82.980
6     94.988
7     90.894
8     76.276
9     93.968
10    93.156
11    79.548
dtype: float64

In [24]:
lr_fin.mean()

80.33666666666666

In [25]:
lr_l2_1 = ebm_scaled_results[:, 3]
lr_l2_2 = gnb_scaled_results[:, 3]
lr_l2_3 = dl_scaled_results[:, 3]
lr_l2_4 = cart_scaled_results[:,3]
lr_l2_5 = lr_scaled_results[:, 2]
lr_l2_list = [lr_l2_1, lr_l2_2, lr_l2_3, lr_l2_4, lr_l2_5]
lr_l2_fin = pd.DataFrame(lr_l2_list).T.mean(axis=1)

In [26]:
lr_l2_fin

0     82.854
1     79.874
2     52.998
3     93.878
4     83.408
5     88.902
6     95.904
7     91.328
8     76.874
9     98.264
10    91.940
11    79.460
dtype: float64

In [27]:
lr_l2_fin.mean()

84.64033333333334

In [28]:
dl_1 = ebm_scaled_results[:, 4]
dl_2 = gnb_scaled_results[:, 4]
dl_3 = lr_l2_scaled_results[:, 3]
dl_4 = cart_scaled_results[:, 4]
dl_5 = lr_scaled_results[:, 3]
dl_list = [dl_1, dl_2, dl_3, dl_4, dl_5]
dl_fin = pd.DataFrame(dl_list).T.mean(axis=1)

In [29]:
dl_fin

0      0.000
1     20.000
2     28.264
3      0.000
4     38.426
5      3.480
6     19.988
7      0.000
8     72.940
9      0.000
10    69.056
11     5.352
dtype: float64

In [30]:
dl_fin.mean()

21.45883333333333

In [31]:
ebm_1 = gnb_scaled_results[:, 1]
ebm_2 = dl_scaled_results[:, 1]
ebm_3 = lr_l2_scaled_results[:,1]
ebm_4 = cart_scaled_results[:,0]
ebm_5 = lr_scaled_results[:, 1]
ebm_list = [ebm_1, ebm_2, ebm_3, ebm_4, ebm_5]
ebm_fin = pd.DataFrame(ebm_list).T.mean(axis=1)

In [32]:
ebm_fin

0     90.334
1     75.002
2     98.388
3     76.394
4     92.962
5     68.150
6     96.306
7     98.628
8     80.534
9     78.672
10    92.020
11    42.584
dtype: float64

In [33]:
ebm_fin.mean()

82.49783333333335

In [46]:
clfs = ["CART", "EBM", "GNB","LR", "LR_l2", "DL"]

In [51]:
from scipy.stats import rankdata

def wilcoxon_test(mean_results):
    # mean_results = np.mean(results, axis=2).T
    print("\nMean results:\n", mean_results)

    ranks = []
    for mr in mean_results:
        ranks.append(rankdata(mr).tolist())
    ranks = np.array(ranks)
    ranks = len(ranks[0])+1 - ranks.astype(int)
    print("\nRanks:\n", ranks)

    mean_ranks = np.mean(ranks, axis=0)
    print(mean_ranks)

    from scipy.stats import ranksums

    alfa = .05
    w_statistic = np.zeros((len(clfs), len(clfs)))
    p_value = np.zeros((len(clfs), len(clfs)))

    for i in range(len(clfs)):
        for j in range(len(clfs)):
            w_statistic[i, j], p_value[i, j] = ranksums(ranks.T[i], ranks.T[j])
    
    from tabulate import tabulate

    headers = clfs
    names_column = np.expand_dims((np.array(clfs)), axis=1)
    w_statistic_table = np.concatenate((names_column, w_statistic), axis=1)
    w_statistic_table = tabulate(w_statistic_table, headers, floatfmt=".2f")
    p_value_table = np.concatenate((names_column, p_value), axis=1)
    p_value_table = tabulate(p_value_table, headers, floatfmt=".2f")
    print("\nw-statistic:\n", w_statistic_table, "\n\np-value:\n", p_value_table)
    
    advantage = np.zeros((len(clfs), len(clfs)))
    advantage[w_statistic > 0] = 1
    advantage_table = tabulate(np.concatenate(
        (names_column, advantage), axis=1), headers)
    print("\nAdvantage:\n", advantage_table)

    significance = np.zeros((len(clfs), len(clfs)))
    significance[p_value <= alfa] = 1
    significance_table = tabulate(np.concatenate(
        (names_column, significance), axis=1), headers)
    print("\nStatistical significance (alpha = 0.05):\n", significance_table)

    stat_better = significance * advantage
    stat_better_table = tabulate(np.concatenate(
        (names_column, stat_better), axis=1), headers)
    print("Statistically significantly better:\n", stat_better_table)

In [52]:
results_wilcox= pd.DataFrame([ebm_fin, cart_fin, lr_fin, lr_l2_fin, gnb_fin, dl_fin]).T
results_wilcox

Unnamed: 0,0,1,2,3,4,5
0,90.334,67.446,66.254,82.854,67.794,0.0
1,75.002,66.25,76.806,79.874,66.262,20.0
2,98.388,85.92,38.25,52.998,41.942,28.264
3,76.394,94.746,84.954,93.878,95.088,0.0
4,92.962,73.77,85.966,83.408,0.0,38.426
5,68.15,84.662,82.98,88.902,37.17,3.48
6,96.306,89.246,94.988,95.904,2.7,19.988
7,98.628,89.116,90.894,91.328,71.44,0.0
8,80.534,77.0,76.276,76.874,0.0,72.94
9,78.672,89.866,93.968,98.264,66.8,0.0


In [53]:
wilcoxon_test(np.array(results_wilcox))


Mean results:
 [[90.334 67.446 66.254 82.854 67.794  0.   ]
 [75.002 66.25  76.806 79.874 66.262 20.   ]
 [98.388 85.92  38.25  52.998 41.942 28.264]
 [76.394 94.746 84.954 93.878 95.088  0.   ]
 [92.962 73.77  85.966 83.408  0.    38.426]
 [68.15  84.662 82.98  88.902 37.17   3.48 ]
 [96.306 89.246 94.988 95.904  2.7   19.988]
 [98.628 89.116 90.894 91.328 71.44   0.   ]
 [80.534 77.    76.276 76.874  0.    72.94 ]
 [78.672 89.866 93.968 98.264 66.8    0.   ]
 [92.02  94.114 93.156 91.94   0.    69.056]
 [42.584 55.854 79.548 79.46  34.33   5.352]]

Ranks:
 [[1 4 5 2 3 6]
 [3 5 2 1 4 6]
 [1 2 5 3 4 6]
 [5 2 4 3 1 6]
 [1 4 2 3 6 5]
 [4 2 3 1 5 6]
 [1 4 3 2 6 5]
 [1 4 3 2 5 6]
 [1 2 4 3 6 5]
 [4 3 2 1 5 6]
 [3 1 2 4 6 5]
 [4 3 1 2 5 6]]
[2.41666667 3.         3.         2.25       4.66666667 5.66666667]

w-statistic:
          CART    EBM    GNB    LR    LR_l2     DL
-----  ------  -----  -----  ----  -------  -----
CART     0.00  -1.07  -1.04  0.09    -2.94  -4.04
EBM      1.07   0.00