In [8]:
import numpy as np
import pandas as pd

import tqdm

from scipy.stats import wilcoxon

from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier

from datasets import binclas_datasets

In [9]:
data_loaders = binclas_datasets['data_loader_function'].values.tolist()

In [10]:
results = []

validator = RepeatedStratifiedKFold(n_splits=5, n_repeats=400, random_state=5)

for data_loader in data_loaders:
    dataset = data_loader()
    X = dataset['data']
    y = dataset['target']
    
    aucs_orig = []
    aucs_flipped = []
    
    for train, test in tqdm.tqdm(validator.split(X, y, y)):
        X_train = X[train]
        X_test = X[test]
        y_train = y[train]
        y_test = y[test]
        
        min_samples_leaf = 1
        max_depth = None
        
        params = {'min_samples_leaf': min_samples_leaf,
                  'max_depth': max_depth}
        
        try:
            pred = RandomForestClassifier(**params).fit(X_train, y_train).predict_proba(X_test)[:, 1]
            aucs_orig.append(roc_auc_score(y_test, pred))
            
            pred = RandomForestClassifier(**params).fit(-X_train, y_train).predict_proba(-X_test)[:, 1]
            aucs_flipped.append(roc_auc_score(y_test, pred))
            
        except:
            pass
    
    tmp = [dataset['name'], np.mean(aucs_orig), np.mean(aucs_flipped),  
                            aucs_orig, aucs_flipped]
    
    tmp = tmp + [wilcoxon(aucs_orig, aucs_flipped, alternative='less', zero_method='zsplit').pvalue]
    
    results.append(tmp)
    
    results_pdf = pd.DataFrame(results, columns=['name', 'auc_orig', 'auc_flipped', 'aucs_orig', 'aucs_flipped', 'p_full'])
    
    print(results_pdf[['name', 
                        'auc_orig', 
                        'auc_flipped', 
                        'p_full']])
        


2000it [09:10,  3.63it/s]


       name  auc_orig  auc_flipped        p_full
0  haberman  0.668074      0.67179  8.374357e-12


2000it [08:13,  4.05it/s]


           name  auc_orig  auc_flipped        p_full
0      haberman  0.668074     0.671790  8.374357e-12
1  new_thyroid1  0.999211     0.999018  9.977881e-01


2000it [07:29,  4.45it/s]


               name  auc_orig  auc_flipped        p_full
0          haberman  0.668074     0.671790  8.374357e-12
1      new_thyroid1  0.999211     0.999018  9.977881e-01
2  shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01


2000it [08:16,  4.03it/s]


               name  auc_orig  auc_flipped        p_full
0          haberman  0.668074     0.671790  8.374357e-12
1      new_thyroid1  0.999211     0.999018  9.977881e-01
2  shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3              bupa  0.763609     0.762488  9.899759e-01


2000it [08:03,  4.13it/s]


               name  auc_orig  auc_flipped        p_full
0          haberman  0.668074     0.671790  8.374357e-12
1      new_thyroid1  0.999211     0.999018  9.977881e-01
2  shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3              bupa  0.763609     0.762488  9.899759e-01
4  cleveland-0_vs_4  0.972031     0.974623  3.934542e-13


2000it [09:32,  3.50it/s]


               name  auc_orig  auc_flipped        p_full
0          haberman  0.668074     0.671790  8.374357e-12
1      new_thyroid1  0.999211     0.999018  9.977881e-01
2  shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3              bupa  0.763609     0.762488  9.899759e-01
4  cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5            ecoli1  0.954112     0.954450  1.572920e-01


2000it [08:41,  3.84it/s]


               name  auc_orig  auc_flipped        p_full
0          haberman  0.668074     0.671790  8.374357e-12
1      new_thyroid1  0.999211     0.999018  9.977881e-01
2  shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3              bupa  0.763609     0.762488  9.899759e-01
4  cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5            ecoli1  0.954112     0.954450  1.572920e-01
6      poker-9_vs_7  0.984884     0.986932  8.602727e-06


2000it [08:37,  3.86it/s]


               name  auc_orig  auc_flipped        p_full
0          haberman  0.668074     0.671790  8.374357e-12
1      new_thyroid1  0.999211     0.999018  9.977881e-01
2  shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3              bupa  0.763609     0.762488  9.899759e-01
4  cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5            ecoli1  0.954112     0.954450  1.572920e-01
6      poker-9_vs_7  0.984884     0.986932  8.602727e-06
7            monk-2  1.000000     0.999997  2.854241e-01


2000it [09:06,  3.66it/s]


               name  auc_orig  auc_flipped        p_full
0          haberman  0.668074     0.671790  8.374357e-12
1      new_thyroid1  0.999211     0.999018  9.977881e-01
2  shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3              bupa  0.763609     0.762488  9.899759e-01
4  cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5            ecoli1  0.954112     0.954450  1.572920e-01
6      poker-9_vs_7  0.984884     0.986932  8.602727e-06
7            monk-2  1.000000     0.999997  2.854241e-01
8         hepatitis  0.876341     0.876665  1.405393e-01


2000it [09:39,  3.45it/s]


                   name  auc_orig  auc_flipped        p_full
0              haberman  0.668074     0.671790  8.374357e-12
1          new_thyroid1  0.999211     0.999018  9.977881e-01
2      shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                  bupa  0.763609     0.762488  9.899759e-01
4      cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                ecoli1  0.954112     0.954450  1.572920e-01
6          poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                monk-2  1.000000     0.999997  2.854241e-01
8             hepatitis  0.876341     0.876665  1.405393e-01
9  yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01


2000it [09:14,  3.60it/s]


                    name  auc_orig  auc_flipped        p_full
0               haberman  0.668074     0.671790  8.374357e-12
1           new_thyroid1  0.999211     0.999018  9.977881e-01
2       shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                   bupa  0.763609     0.762488  9.899759e-01
4       cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                 ecoli1  0.954112     0.954450  1.572920e-01
6           poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                 monk-2  1.000000     0.999997  2.854241e-01
8              hepatitis  0.876341     0.876665  1.405393e-01
9   yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01
10          mammographic  0.867661     0.867841  1.127779e-01


2000it [09:27,  3.52it/s]


                    name  auc_orig  auc_flipped        p_full
0               haberman  0.668074     0.671790  8.374357e-12
1           new_thyroid1  0.999211     0.999018  9.977881e-01
2       shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                   bupa  0.763609     0.762488  9.899759e-01
4       cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                 ecoli1  0.954112     0.954450  1.572920e-01
6           poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                 monk-2  1.000000     0.999997  2.854241e-01
8              hepatitis  0.876341     0.876665  1.405393e-01
9   yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01
10          mammographic  0.867661     0.867841  1.127779e-01
11               saheart  0.722080     0.721963  6.955698e-01


2000it [07:03,  4.72it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.668074     0.671790  8.374357e-12
1                   new_thyroid1  0.999211     0.999018  9.977881e-01
2               shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                           bupa  0.763609     0.762488  9.899759e-01
4               cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                         ecoli1  0.954112     0.954450  1.572920e-01
6                   poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                         monk-2  1.000000     0.999997  2.854241e-01
8                      hepatitis  0.876341     0.876665  1.405393e-01
9           yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01
10                  mammographic  0.867661     0.867841  1.127779e-01
11                       saheart  0.722080     0.721963  6.955698e-01
12  lymphography-normal-fibrosis  0.993940     0.994824  6.933358e-04


2000it [10:06,  3.30it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.668074     0.671790  8.374357e-12
1                   new_thyroid1  0.999211     0.999018  9.977881e-01
2               shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                           bupa  0.763609     0.762488  9.899759e-01
4               cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                         ecoli1  0.954112     0.954450  1.572920e-01
6                   poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                         monk-2  1.000000     0.999997  2.854241e-01
8                      hepatitis  0.876341     0.876665  1.405393e-01
9           yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01
10                  mammographic  0.867661     0.867841  1.127779e-01
11                       saheart  0.722080     0.721963  6.955698e-01
12  lymphography-normal-fibrosis  0.993940     0.994824  6.933358e-04
13                  

2000it [08:42,  3.83it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.668074     0.671790  8.374357e-12
1                   new_thyroid1  0.999211     0.999018  9.977881e-01
2               shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                           bupa  0.763609     0.762488  9.899759e-01
4               cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                         ecoli1  0.954112     0.954450  1.572920e-01
6                   poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                         monk-2  1.000000     0.999997  2.854241e-01
8                      hepatitis  0.876341     0.876665  1.405393e-01
9           yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01
10                  mammographic  0.867661     0.867841  1.127779e-01
11                       saheart  0.722080     0.721963  6.955698e-01
12  lymphography-normal-fibrosis  0.993940     0.994824  6.933358e-04
13                  

2000it [09:36,  3.47it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.668074     0.671790  8.374357e-12
1                   new_thyroid1  0.999211     0.999018  9.977881e-01
2               shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                           bupa  0.763609     0.762488  9.899759e-01
4               cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                         ecoli1  0.954112     0.954450  1.572920e-01
6                   poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                         monk-2  1.000000     0.999997  2.854241e-01
8                      hepatitis  0.876341     0.876665  1.405393e-01
9           yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01
10                  mammographic  0.867661     0.867841  1.127779e-01
11                       saheart  0.722080     0.721963  6.955698e-01
12  lymphography-normal-fibrosis  0.993940     0.994824  6.933358e-04
13                  

2000it [08:27,  3.94it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.668074     0.671790  8.374357e-12
1                   new_thyroid1  0.999211     0.999018  9.977881e-01
2               shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                           bupa  0.763609     0.762488  9.899759e-01
4               cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                         ecoli1  0.954112     0.954450  1.572920e-01
6                   poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                         monk-2  1.000000     0.999997  2.854241e-01
8                      hepatitis  0.876341     0.876665  1.405393e-01
9           yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01
10                  mammographic  0.867661     0.867841  1.127779e-01
11                       saheart  0.722080     0.721963  6.955698e-01
12  lymphography-normal-fibrosis  0.993940     0.994824  6.933358e-04
13                  

2000it [09:00,  3.70it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.668074     0.671790  8.374357e-12
1                   new_thyroid1  0.999211     0.999018  9.977881e-01
2               shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                           bupa  0.763609     0.762488  9.899759e-01
4               cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                         ecoli1  0.954112     0.954450  1.572920e-01
6                   poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                         monk-2  1.000000     0.999997  2.854241e-01
8                      hepatitis  0.876341     0.876665  1.405393e-01
9           yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01
10                  mammographic  0.867661     0.867841  1.127779e-01
11                       saheart  0.722080     0.721963  6.955698e-01
12  lymphography-normal-fibrosis  0.993940     0.994824  6.933358e-04
13                  

2000it [09:18,  3.58it/s]


                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.668074     0.671790  8.374357e-12
1                   new_thyroid1  0.999211     0.999018  9.977881e-01
2               shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                           bupa  0.763609     0.762488  9.899759e-01
4               cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                         ecoli1  0.954112     0.954450  1.572920e-01
6                   poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                         monk-2  1.000000     0.999997  2.854241e-01
8                      hepatitis  0.876341     0.876665  1.405393e-01
9           yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01
10                  mammographic  0.867661     0.867841  1.127779e-01
11                       saheart  0.722080     0.721963  6.955698e-01
12  lymphography-normal-fibrosis  0.993940     0.994824  6.933358e-04
13                  

2000it [08:19,  4.00it/s]

                            name  auc_orig  auc_flipped        p_full
0                       haberman  0.668074     0.671790  8.374357e-12
1                   new_thyroid1  0.999211     0.999018  9.977881e-01
2               shuttle-6_vs_2-3  1.000000     1.000000  3.782855e-01
3                           bupa  0.763609     0.762488  9.899759e-01
4               cleveland-0_vs_4  0.972031     0.974623  3.934542e-13
5                         ecoli1  0.954112     0.954450  1.572920e-01
6                   poker-9_vs_7  0.984884     0.986932  8.602727e-06
7                         monk-2  1.000000     0.999997  2.854241e-01
8                      hepatitis  0.876341     0.876665  1.405393e-01
9           yeast-0-3-5-9_vs_7-8  0.794184     0.793264  9.174351e-01
10                  mammographic  0.867661     0.867841  1.127779e-01
11                       saheart  0.722080     0.721963  6.955698e-01
12  lymphography-normal-fibrosis  0.993940     0.994824  6.933358e-04
13                  




In [11]:
results_pdf.to_csv('existance-classification-rf.csv')