In [9]:
import numpy as np
import pandas as pd

import tqdm

from scipy.stats import wilcoxon

from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.tree import DecisionTreeClassifier

from datasets import binclas_datasets

In [10]:
data_loaders = binclas_datasets['data_loader_function'].values.tolist()

In [11]:
results = []

validator = RepeatedStratifiedKFold(n_splits=5, n_repeats=400, random_state=5)

for data_loader in data_loaders:
    dataset = data_loader()
    X = dataset['data']
    y = dataset['target']
    
    aucs_orig = []
    aucs_flipped = []
    
    for train, test in tqdm.tqdm(validator.split(X, y, y)):
        X_train = X[train]
        X_test = X[test]
        y_train = y[train]
        y_test = y[test]
        
        min_samples_leaf = 1
        max_depth = None
        
        params = {'min_samples_leaf': min_samples_leaf,
                  'max_depth': max_depth}
        
        try:
            pred = DecisionTreeClassifier(**params).fit(X_train, y_train).predict_proba(X_test)[:, 1]
            aucs_orig.append(roc_auc_score(y_test, pred))
            
            pred = DecisionTreeClassifier(**params).fit(-X_train, y_train).predict_proba(-X_test)[:, 1]
            aucs_flipped.append(roc_auc_score(y_test, pred))
            
        except:
            pass
    
    tmp = [dataset['name'], np.mean(aucs_orig), np.mean(aucs_flipped),  
                            aucs_orig, aucs_flipped]
    
    tmp = tmp + [wilcoxon(aucs_orig, aucs_flipped, alternative='less', zero_method='zsplit').pvalue]
    
    results.append(tmp)
    
    results_pdf = pd.DataFrame(results, columns=['name', 'auc_orig', 'auc_flipped', 'aucs_orig', 'aucs_flipped', 'p_full'])
    
    print(results_pdf[['name', 
                        'auc_orig', 
                        'auc_flipped', 
                        'p_full']])
        


2000it [00:07, 261.40it/s]


       name  auc_orig  auc_flipped    p_full
0  haberman  0.559107      0.56115  0.038294


2000it [00:05, 343.70it/s]


           name  auc_orig  auc_flipped    p_full
0      haberman  0.559107     0.561150  0.038294
1  new_thyroid1  0.930523     0.932205  0.031379


2000it [00:04, 419.76it/s]


               name  auc_orig  auc_flipped    p_full
0          haberman  0.559107     0.561150  0.038294
1      new_thyroid1  0.930523     0.932205  0.031379
2  shuttle-6_vs_2-3  1.000000     1.000000  0.500000


2000it [00:07, 277.05it/s]


               name  auc_orig  auc_flipped    p_full
0          haberman  0.559107     0.561150  0.038294
1      new_thyroid1  0.930523     0.932205  0.031379
2  shuttle-6_vs_2-3  1.000000     1.000000  0.500000
3              bupa  0.625072     0.624825  0.534558


2000it [00:05, 381.16it/s]


               name  auc_orig  auc_flipped    p_full
0          haberman  0.559107     0.561150  0.038294
1      new_thyroid1  0.930523     0.932205  0.031379
2  shuttle-6_vs_2-3  1.000000     1.000000  0.500000
3              bupa  0.625072     0.624825  0.534558
4  cleveland-0_vs_4  0.672698     0.673966  0.696630


2000it [00:05, 337.81it/s]


               name  auc_orig  auc_flipped    p_full
0          haberman  0.559107     0.561150  0.038294
1      new_thyroid1  0.930523     0.932205  0.031379
2  shuttle-6_vs_2-3  1.000000     1.000000  0.500000
3              bupa  0.625072     0.624825  0.534558
4  cleveland-0_vs_4  0.672698     0.673966  0.696630
5            ecoli1  0.833195     0.830251  0.999990


2000it [00:05, 378.13it/s]


               name  auc_orig  auc_flipped        p_full
0          haberman  0.559107     0.561150  3.829416e-02
1      new_thyroid1  0.930523     0.932205  3.137950e-02
2  shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3              bupa  0.625072     0.624825  5.345577e-01
4  cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5            ecoli1  0.833195     0.830251  9.999896e-01
6      poker-9_vs_7  0.606970     0.607936  1.266074e-13


2000it [00:05, 395.30it/s]


               name  auc_orig  auc_flipped        p_full
0          haberman  0.559107     0.561150  3.829416e-02
1      new_thyroid1  0.930523     0.932205  3.137950e-02
2  shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3              bupa  0.625072     0.624825  5.345577e-01
4  cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5            ecoli1  0.833195     0.830251  9.999896e-01
6      poker-9_vs_7  0.606970     0.607936  1.266074e-13
7            monk-2  1.000000     1.000000  5.000000e-01


2000it [00:06, 289.14it/s]


               name  auc_orig  auc_flipped        p_full
0          haberman  0.559107     0.561150  3.829416e-02
1      new_thyroid1  0.930523     0.932205  3.137950e-02
2  shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3              bupa  0.625072     0.624825  5.345577e-01
4  cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5            ecoli1  0.833195     0.830251  9.999896e-01
6      poker-9_vs_7  0.606970     0.607936  1.266074e-13
7            monk-2  1.000000     1.000000  5.000000e-01
8         hepatitis  0.665344     0.666045  3.253611e-01


2000it [00:10, 196.86it/s]


                   name  auc_orig  auc_flipped        p_full
0              haberman  0.559107     0.561150  3.829416e-02
1          new_thyroid1  0.930523     0.932205  3.137950e-02
2      shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                  bupa  0.625072     0.624825  5.345577e-01
4      cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                ecoli1  0.833195     0.830251  9.999896e-01
6          poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                monk-2  1.000000     1.000000  5.000000e-01
8             hepatitis  0.665344     0.666045  3.253611e-01
9  yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01


2000it [00:07, 263.47it/s]


                    name  auc_orig  auc_flipped        p_full
0               haberman  0.559107     0.561150  3.829416e-02
1           new_thyroid1  0.930523     0.932205  3.137950e-02
2       shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                   bupa  0.625072     0.624825  5.345577e-01
4       cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                 ecoli1  0.833195     0.830251  9.999896e-01
6           poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                 monk-2  1.000000     1.000000  5.000000e-01
8              hepatitis  0.665344     0.666045  3.253611e-01
9   yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01
10          mammographic  0.788031     0.788284  1.364901e-01


2000it [00:10, 185.26it/s]


                    name  auc_orig  auc_flipped        p_full
0               haberman  0.559107     0.561150  3.829416e-02
1           new_thyroid1  0.930523     0.932205  3.137950e-02
2       shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                   bupa  0.625072     0.624825  5.345577e-01
4       cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                 ecoli1  0.833195     0.830251  9.999896e-01
6           poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                 monk-2  1.000000     1.000000  5.000000e-01
8              hepatitis  0.665344     0.666045  3.253611e-01
9   yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01
10          mammographic  0.788031     0.788284  1.364901e-01
11               saheart  0.584061     0.583671  6.927832e-01


2000it [00:05, 384.45it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.559107     0.561150  3.829416e-02
1                   new_thyroid1  0.930523     0.932205  3.137950e-02
2               shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                           bupa  0.625072     0.624825  5.345577e-01
4               cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                         ecoli1  0.833195     0.830251  9.999896e-01
6                   poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                         monk-2  1.000000     1.000000  5.000000e-01
8                      hepatitis  0.665344     0.666045  3.253611e-01
9           yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01
10                  mammographic  0.788031     0.788284  1.364901e-01
11                       saheart  0.584061     0.583671  6.927832e-01
12  lymphography-normal-fibrosis  0.889456     0.887554  8.150245e-01


2000it [00:12, 160.35it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.559107     0.561150  3.829416e-02
1                   new_thyroid1  0.930523     0.932205  3.137950e-02
2               shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                           bupa  0.625072     0.624825  5.345577e-01
4               cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                         ecoli1  0.833195     0.830251  9.999896e-01
6                   poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                         monk-2  1.000000     1.000000  5.000000e-01
8                      hepatitis  0.665344     0.666045  3.253611e-01
9           yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01
10                  mammographic  0.788031     0.788284  1.364901e-01
11                       saheart  0.584061     0.583671  6.927832e-01
12  lymphography-normal-fibrosis  0.889456     0.887554  8.150245e-01
13                  

2000it [00:07, 263.27it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.559107     0.561150  3.829416e-02
1                   new_thyroid1  0.930523     0.932205  3.137950e-02
2               shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                           bupa  0.625072     0.624825  5.345577e-01
4               cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                         ecoli1  0.833195     0.830251  9.999896e-01
6                   poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                         monk-2  1.000000     1.000000  5.000000e-01
8                      hepatitis  0.665344     0.666045  3.253611e-01
9           yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01
10                  mammographic  0.788031     0.788284  1.364901e-01
11                       saheart  0.584061     0.583671  6.927832e-01
12  lymphography-normal-fibrosis  0.889456     0.887554  8.150245e-01
13                  

2000it [00:11, 166.76it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.559107     0.561150  3.829416e-02
1                   new_thyroid1  0.930523     0.932205  3.137950e-02
2               shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                           bupa  0.625072     0.624825  5.345577e-01
4               cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                         ecoli1  0.833195     0.830251  9.999896e-01
6                   poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                         monk-2  1.000000     1.000000  5.000000e-01
8                      hepatitis  0.665344     0.666045  3.253611e-01
9           yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01
10                  mammographic  0.788031     0.788284  1.364901e-01
11                       saheart  0.584061     0.583671  6.927832e-01
12  lymphography-normal-fibrosis  0.889456     0.887554  8.150245e-01
13                  

2000it [00:11, 178.16it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.559107     0.561150  3.829416e-02
1                   new_thyroid1  0.930523     0.932205  3.137950e-02
2               shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                           bupa  0.625072     0.624825  5.345577e-01
4               cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                         ecoli1  0.833195     0.830251  9.999896e-01
6                   poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                         monk-2  1.000000     1.000000  5.000000e-01
8                      hepatitis  0.665344     0.666045  3.253611e-01
9           yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01
10                  mammographic  0.788031     0.788284  1.364901e-01
11                       saheart  0.584061     0.583671  6.927832e-01
12  lymphography-normal-fibrosis  0.889456     0.887554  8.150245e-01
13                  

2000it [00:13, 148.18it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.559107     0.561150  3.829416e-02
1                   new_thyroid1  0.930523     0.932205  3.137950e-02
2               shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                           bupa  0.625072     0.624825  5.345577e-01
4               cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                         ecoli1  0.833195     0.830251  9.999896e-01
6                   poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                         monk-2  1.000000     1.000000  5.000000e-01
8                      hepatitis  0.665344     0.666045  3.253611e-01
9           yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01
10                  mammographic  0.788031     0.788284  1.364901e-01
11                       saheart  0.584061     0.583671  6.927832e-01
12  lymphography-normal-fibrosis  0.889456     0.887554  8.150245e-01
13                  

2000it [00:11, 176.41it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.559107     0.561150  3.829416e-02
1                   new_thyroid1  0.930523     0.932205  3.137950e-02
2               shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                           bupa  0.625072     0.624825  5.345577e-01
4               cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                         ecoli1  0.833195     0.830251  9.999896e-01
6                   poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                         monk-2  1.000000     1.000000  5.000000e-01
8                      hepatitis  0.665344     0.666045  3.253611e-01
9           yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01
10                  mammographic  0.788031     0.788284  1.364901e-01
11                       saheart  0.584061     0.583671  6.927832e-01
12  lymphography-normal-fibrosis  0.889456     0.887554  8.150245e-01
13                  

2000it [00:14, 140.15it/s]

                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.559107     0.561150  3.829416e-02
1                   new_thyroid1  0.930523     0.932205  3.137950e-02
2               shuttle-6_vs_2-3  1.000000     1.000000  5.000000e-01
3                           bupa  0.625072     0.624825  5.345577e-01
4               cleveland-0_vs_4  0.672698     0.673966  6.966295e-01
5                         ecoli1  0.833195     0.830251  9.999896e-01
6                   poker-9_vs_7  0.606970     0.607936  1.266074e-13
7                         monk-2  1.000000     1.000000  5.000000e-01
8                      hepatitis  0.665344     0.666045  3.253611e-01
9           yeast-0-3-5-9_vs_7-8  0.641793     0.639321  9.800358e-01
10                  mammographic  0.788031     0.788284  1.364901e-01
11                       saheart  0.584061     0.583671  6.927832e-01
12  lymphography-normal-fibrosis  0.889456     0.887554  8.150245e-01
13                  




In [12]:
results_pdf.to_csv('existance-classification-dt.csv')