In [1]:
import numpy as np

In [9]:
from sklearn.tree import DecisionTreeClassifier
from interpret.glassbox import ExplainableBoostingClassifier
from interpret.glassbox import DecisionListClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB


clfs = {
    "CART": DecisionTreeClassifier(random_state=1234),
    "EBM": ExplainableBoostingClassifier(),
    "LR": LogisticRegression(random_state=1234),
    "GNB": GaussianNB(),
    "DL": DecisionListClassifier(random_state=1234)
    
}

In [3]:
results = np.load('test_results/auc/auc_results.npy')
print("\nScores:\n", results.shape)


Scores:
 (5, 11, 10)


In [4]:
mean_results = np.mean(results, axis=2).T
print("\nMean results:\n", mean_results)


Mean results:
 [[0.93055088 0.95912893 0.95713197 0.92571745 0.92121602]
 [0.76560313 0.82247063 0.80306682 0.74701086 0.51868132]
 [0.68984422 0.72551118 0.58374287 0.65885828 0.67644476]
 [0.68979695 0.64459081 0.61833333 0.70959596 0.5       ]
 [0.55964083 0.54397788 0.54037836 0.57965632 0.58248116]
 [0.66897205 0.7113218  0.71082914 0.71373305 0.72512858]
 [0.61519013 0.66566033 0.67338044 0.6697739  0.50309524]
 [0.68904082 0.77068027 0.7837619  0.77835374 0.7462381 ]
 [0.54330156 0.50563318 0.50232776 0.64755341 0.50684868]
 [0.57333082 0.65364844 0.66281159 0.66257785 0.51253968]
 [0.56879518 0.59993574 0.50007108 0.5506782  0.56451635]]


In [5]:
from scipy.stats import rankdata
ranks = []
for mr in mean_results:
    ranks.append(rankdata(mr).tolist())
ranks = np.array(ranks)
print("\nRanks:\n", ranks)


Ranks:
 [[3. 5. 4. 2. 1.]
 [3. 5. 4. 2. 1.]
 [4. 5. 1. 2. 3.]
 [4. 3. 2. 5. 1.]
 [3. 2. 1. 4. 5.]
 [1. 3. 2. 4. 5.]
 [2. 3. 5. 4. 1.]
 [1. 3. 5. 4. 2.]
 [4. 2. 1. 5. 3.]
 [2. 3. 5. 4. 1.]
 [4. 5. 1. 2. 3.]]


In [7]:
mean_ranks = np.mean(ranks, axis=0)
mean_ranks

array([2.81818182, 3.54545455, 2.81818182, 3.45454545, 2.36363636])

## Wilcoxon's rank statistical test

In [10]:
from scipy.stats import ranksums

alfa = .05
w_statistic = np.zeros((len(clfs), len(clfs)))
p_value = np.zeros((len(clfs), len(clfs)))

for i in range(len(clfs)):
    for j in range(len(clfs)):
        w_statistic[i, j], p_value[i, j] = ranksums(ranks.T[i], ranks.T[j])

In [12]:
from tabulate import tabulate

headers = list(clfs.keys())
names_column = np.expand_dims(np.array(list(clfs.keys())), axis=1)
w_statistic_table = np.concatenate((names_column, w_statistic), axis=1)
w_statistic_table = tabulate(w_statistic_table, headers, floatfmt=".2f")
p_value_table = np.concatenate((names_column, p_value), axis=1)
p_value_table = tabulate(p_value_table, headers, floatfmt=".2f")
print("\nw-statistic:\n", w_statistic_table, "\n\np-value:\n", p_value_table)


w-statistic:
         CART    EBM     LR    GNB    DL
----  ------  -----  -----  -----  ----
CART    0.00  -1.12  -0.03  -1.21  0.92
EBM     1.12   0.00   1.15   0.23  1.84
LR      0.03  -1.15   0.00  -0.89  0.56
GNB     1.21  -0.23   0.89   0.00  1.74
DL     -0.92  -1.84  -0.56  -1.74  0.00 

p-value:
         CART    EBM    LR    GNB    DL
----  ------  -----  ----  -----  ----
CART    1.00   0.26  0.97   0.22  0.36
EBM     0.26   1.00  0.25   0.82  0.07
LR      0.97   0.25  1.00   0.38  0.58
GNB     0.22   0.82  0.38   1.00  0.08
DL      0.36   0.07  0.58   0.08  1.00


In [13]:
advantage = np.zeros((len(clfs), len(clfs)))
advantage[w_statistic > 0] = 1
advantage_table = tabulate(np.concatenate(
    (names_column, advantage), axis=1), headers)
print("\nAdvantage:\n", advantage_table)


Advantage:
         CART    EBM    LR    GNB    DL
----  ------  -----  ----  -----  ----
CART       0      0     0      0     1
EBM        1      0     1      1     1
LR         1      0     0      0     1
GNB        1      0     1      0     1
DL         0      0     0      0     0


In [14]:
significance = np.zeros((len(clfs), len(clfs)))
significance[p_value <= alfa] = 1
significance_table = tabulate(np.concatenate(
    (names_column, significance), axis=1), headers)
print("\nStatistical significance (alpha = 0.05):\n", significance_table)


Statistical significance (alpha = 0.05):
         CART    EBM    LR    GNB    DL
----  ------  -----  ----  -----  ----
CART       0      0     0      0     0
EBM        0      0     0      0     0
LR         0      0     0      0     0
GNB        0      0     0      0     0
DL         0      0     0      0     0
