In [8]:
import numpy as np
import pandas as pd

import tqdm

from scipy.stats import wilcoxon

from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import r2_score
from sklearn.tree import DecisionTreeRegressor

from datasets import regr_datasets

In [9]:
data_loaders = regr_datasets['data_loader_function'].values.tolist()

In [10]:
results = []

validator = RepeatedKFold(n_splits=5, n_repeats=400, random_state=5)

for data_loader in data_loaders:
    dataset = data_loader()
    X = dataset['data']
    y = dataset['target']
    
    r2s_orig = []
    r2s_flipped = []
    
    for train, test in tqdm.tqdm(validator.split(X, y, y)):
        X_train = X[train]
        X_test = X[test]
        y_train = y[train]
        y_test = y[test]
        
        min_samples_leaf = 1
        max_depth = None
        
        params = {'min_samples_leaf': min_samples_leaf,
                  'max_depth': max_depth}
        
        try:
            pred = DecisionTreeRegressor(**params).fit(X_train, y_train).predict(X_test)
            r2s_orig.append(r2_score(y_test, pred))
            
            pred = DecisionTreeRegressor(**params).fit(-X_train, y_train).predict(-X_test)
            r2s_flipped.append(r2_score(y_test, pred))
            
        except:
            pass
    
    tmp = [dataset['name'], np.mean(r2s_orig), np.mean(r2s_flipped),  
                            r2s_orig, r2s_flipped]
    
    tmp = tmp + [wilcoxon(r2s_orig, r2s_flipped, alternative='less', zero_method='zsplit').pvalue]
    
    results.append(tmp)
    
    results_pdf = pd.DataFrame(results, columns=['name', 'r2_orig', 'r2_flipped', 'r2s_orig', 'r2s_flipped', 'p_full'])
    
    print(results_pdf[['name', 
                        'r2_orig', 
                        'r2_flipped', 
                        'p_full']])
        


2000it [00:02, 923.40it/s] 


     name   r2_orig  r2_flipped        p_full
0  o-ring  0.111189    0.252839  4.755020e-28


2000it [00:03, 614.21it/s]


                          name   r2_orig  r2_flipped        p_full
0                       o-ring  0.111189    0.252839  4.755020e-28
1  stock_portfolio_performance  0.986904    0.986716  4.985757e-01


2000it [00:04, 477.04it/s]


                          name   r2_orig  r2_flipped        p_full
0                       o-ring  0.111189    0.252839  4.755020e-28
1  stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                      wsn-ale -0.065874   -0.108504  8.774297e-01


2000it [00:03, 605.32it/s]


                          name   r2_orig  r2_flipped        p_full
0                       o-ring  0.111189    0.252839  4.755020e-28
1  stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                      wsn-ale -0.065874   -0.108504  8.774297e-01
3                 daily-demand  0.660793    0.679240  2.668946e-04


2000it [00:03, 642.38it/s]


                          name   r2_orig  r2_flipped        p_full
0                       o-ring  0.111189    0.252839  4.755020e-28
1  stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                      wsn-ale -0.065874   -0.108504  8.774297e-01
3                 daily-demand  0.660793    0.679240  2.668946e-04
4                        servo  0.469595    0.469226  9.999939e-01


2000it [00:08, 244.46it/s]


                          name   r2_orig  r2_flipped        p_full
0                       o-ring  0.111189    0.252839  4.755020e-28
1  stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                      wsn-ale -0.065874   -0.108504  8.774297e-01
3                 daily-demand  0.660793    0.679240  2.668946e-04
4                        servo  0.469595    0.469226  9.999939e-01
5          yacht_hydrodynamics  0.992119    0.992220  5.855707e-02


2000it [00:08, 239.30it/s]


                          name   r2_orig  r2_flipped        p_full
0                       o-ring  0.111189    0.252839  4.755020e-28
1  stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                      wsn-ale -0.065874   -0.108504  8.774297e-01
3                 daily-demand  0.660793    0.679240  2.668946e-04
4                        servo  0.469595    0.469226  9.999939e-01
5          yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                     autoMPG6  0.765681    0.765952  3.578707e-01


2000it [00:05, 337.06it/s]


                          name   r2_orig  r2_flipped        p_full
0                       o-ring  0.111189    0.252839  4.755020e-28
1  stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                      wsn-ale -0.065874   -0.108504  8.774297e-01
3                 daily-demand  0.660793    0.679240  2.668946e-04
4                        servo  0.469595    0.469226  9.999939e-01
5          yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                     autoMPG6  0.765681    0.765952  3.578707e-01
7           excitation_current  0.999820    0.999821  1.581973e-01


2000it [00:08, 234.16it/s]


                          name   r2_orig  r2_flipped        p_full
0                       o-ring  0.111189    0.252839  4.755020e-28
1  stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                      wsn-ale -0.065874   -0.108504  8.774297e-01
3                 daily-demand  0.660793    0.679240  2.668946e-04
4                        servo  0.469595    0.469226  9.999939e-01
5          yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                     autoMPG6  0.765681    0.765952  3.578707e-01
7           excitation_current  0.999820    0.999821  1.581973e-01
8        real_estate_valuation  0.459408    0.474642  5.347367e-06


2000it [00:09, 215.95it/s]


                          name   r2_orig  r2_flipped        p_full
0                       o-ring  0.111189    0.252839  4.755020e-28
1  stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                      wsn-ale -0.065874   -0.108504  8.774297e-01
3                 daily-demand  0.660793    0.679240  2.668946e-04
4                        servo  0.469595    0.469226  9.999939e-01
5          yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                     autoMPG6  0.765681    0.765952  3.578707e-01
7           excitation_current  0.999820    0.999821  1.581973e-01
8        real_estate_valuation  0.459408    0.474642  5.347367e-06
9                      wankara  0.969560    0.969572  4.522653e-01


2000it [00:09, 201.92it/s]


                           name   r2_orig  r2_flipped        p_full
0                        o-ring  0.111189    0.252839  4.755020e-28
1   stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                       wsn-ale -0.065874   -0.108504  8.774297e-01
3                  daily-demand  0.660793    0.679240  2.668946e-04
4                         servo  0.469595    0.469226  9.999939e-01
5           yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                      autoMPG6  0.765681    0.765952  3.578707e-01
7            excitation_current  0.999820    0.999821  1.581973e-01
8         real_estate_valuation  0.459408    0.474642  5.347367e-06
9                       wankara  0.969560    0.969572  4.522653e-01
10                      plastic  0.648509    0.631937  1.000000e+00


2000it [00:11, 170.21it/s]


                           name   r2_orig  r2_flipped        p_full
0                        o-ring  0.111189    0.252839  4.755020e-28
1   stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                       wsn-ale -0.065874   -0.108504  8.774297e-01
3                  daily-demand  0.660793    0.679240  2.668946e-04
4                         servo  0.469595    0.469226  9.999939e-01
5           yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                      autoMPG6  0.765681    0.765952  3.578707e-01
7            excitation_current  0.999820    0.999821  1.581973e-01
8         real_estate_valuation  0.459408    0.474642  5.347367e-06
9                       wankara  0.969560    0.969572  4.522653e-01
10                      plastic  0.648509    0.631937  1.000000e+00
11                        laser  0.921516    0.923284  4.018644e-04


2000it [00:10, 196.38it/s]


                           name   r2_orig  r2_flipped        p_full
0                        o-ring  0.111189    0.252839  4.755020e-28
1   stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                       wsn-ale -0.065874   -0.108504  8.774297e-01
3                  daily-demand  0.660793    0.679240  2.668946e-04
4                         servo  0.469595    0.469226  9.999939e-01
5           yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                      autoMPG6  0.765681    0.765952  3.578707e-01
7            excitation_current  0.999820    0.999821  1.581973e-01
8         real_estate_valuation  0.459408    0.474642  5.347367e-06
9                       wankara  0.969560    0.969572  4.522653e-01
10                      plastic  0.648509    0.631937  1.000000e+00
11                        laser  0.921516    0.923284  4.018644e-04
12        qsar-aquatic-toxicity  0.120081    0.126027  1.161612e-03


2000it [00:08, 238.97it/s]


                           name   r2_orig  r2_flipped        p_full
0                        o-ring  0.111189    0.252839  4.755020e-28
1   stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                       wsn-ale -0.065874   -0.108504  8.774297e-01
3                  daily-demand  0.660793    0.679240  2.668946e-04
4                         servo  0.469595    0.469226  9.999939e-01
5           yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                      autoMPG6  0.765681    0.765952  3.578707e-01
7            excitation_current  0.999820    0.999821  1.581973e-01
8         real_estate_valuation  0.459408    0.474642  5.347367e-06
9                       wankara  0.969560    0.969572  4.522653e-01
10                      plastic  0.648509    0.631937  1.000000e+00
11                        laser  0.921516    0.923284  4.018644e-04
12        qsar-aquatic-toxicity  0.120081    0.126027  1.161612e-03
13                     baseball  0.400659    0.3

2000it [00:06, 316.17it/s]


                           name   r2_orig  r2_flipped        p_full
0                        o-ring  0.111189    0.252839  4.755020e-28
1   stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                       wsn-ale -0.065874   -0.108504  8.774297e-01
3                  daily-demand  0.660793    0.679240  2.668946e-04
4                         servo  0.469595    0.469226  9.999939e-01
5           yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                      autoMPG6  0.765681    0.765952  3.578707e-01
7            excitation_current  0.999820    0.999821  1.581973e-01
8         real_estate_valuation  0.459408    0.474642  5.347367e-06
9                       wankara  0.969560    0.969572  4.522653e-01
10                      plastic  0.648509    0.631937  1.000000e+00
11                        laser  0.921516    0.923284  4.018644e-04
12        qsar-aquatic-toxicity  0.120081    0.126027  1.161612e-03
13                     baseball  0.400659    0.3

2000it [00:10, 199.26it/s]


                           name   r2_orig  r2_flipped        p_full
0                        o-ring  0.111189    0.252839  4.755020e-28
1   stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                       wsn-ale -0.065874   -0.108504  8.774297e-01
3                  daily-demand  0.660793    0.679240  2.668946e-04
4                         servo  0.469595    0.469226  9.999939e-01
5           yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                      autoMPG6  0.765681    0.765952  3.578707e-01
7            excitation_current  0.999820    0.999821  1.581973e-01
8         real_estate_valuation  0.459408    0.474642  5.347367e-06
9                       wankara  0.969560    0.969572  4.522653e-01
10                      plastic  0.648509    0.631937  1.000000e+00
11                        laser  0.921516    0.923284  4.018644e-04
12        qsar-aquatic-toxicity  0.120081    0.126027  1.161612e-03
13                     baseball  0.400659    0.3

2000it [00:13, 146.49it/s]


                           name   r2_orig  r2_flipped        p_full
0                        o-ring  0.111189    0.252839  4.755020e-28
1   stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                       wsn-ale -0.065874   -0.108504  8.774297e-01
3                  daily-demand  0.660793    0.679240  2.668946e-04
4                         servo  0.469595    0.469226  9.999939e-01
5           yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                      autoMPG6  0.765681    0.765952  3.578707e-01
7            excitation_current  0.999820    0.999821  1.581973e-01
8         real_estate_valuation  0.459408    0.474642  5.347367e-06
9                       wankara  0.969560    0.969572  4.522653e-01
10                      plastic  0.648509    0.631937  1.000000e+00
11                        laser  0.921516    0.923284  4.018644e-04
12        qsar-aquatic-toxicity  0.120081    0.126027  1.161612e-03
13                     baseball  0.400659    0.3

2000it [00:21, 94.16it/s]


                           name   r2_orig  r2_flipped        p_full
0                        o-ring  0.111189    0.252839  4.755020e-28
1   stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                       wsn-ale -0.065874   -0.108504  8.774297e-01
3                  daily-demand  0.660793    0.679240  2.668946e-04
4                         servo  0.469595    0.469226  9.999939e-01
5           yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                      autoMPG6  0.765681    0.765952  3.578707e-01
7            excitation_current  0.999820    0.999821  1.581973e-01
8         real_estate_valuation  0.459408    0.474642  5.347367e-06
9                       wankara  0.969560    0.969572  4.522653e-01
10                      plastic  0.648509    0.631937  1.000000e+00
11                        laser  0.921516    0.923284  4.018644e-04
12        qsar-aquatic-toxicity  0.120081    0.126027  1.161612e-03
13                     baseball  0.400659    0.3

2000it [00:13, 149.36it/s]


                           name   r2_orig  r2_flipped        p_full
0                        o-ring  0.111189    0.252839  4.755020e-28
1   stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                       wsn-ale -0.065874   -0.108504  8.774297e-01
3                  daily-demand  0.660793    0.679240  2.668946e-04
4                         servo  0.469595    0.469226  9.999939e-01
5           yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                      autoMPG6  0.765681    0.765952  3.578707e-01
7            excitation_current  0.999820    0.999821  1.581973e-01
8         real_estate_valuation  0.459408    0.474642  5.347367e-06
9                       wankara  0.969560    0.969572  4.522653e-01
10                      plastic  0.648509    0.631937  1.000000e+00
11                        laser  0.921516    0.923284  4.018644e-04
12        qsar-aquatic-toxicity  0.120081    0.126027  1.161612e-03
13                     baseball  0.400659    0.3

2000it [00:31, 63.40it/s]

                           name   r2_orig  r2_flipped        p_full
0                        o-ring  0.111189    0.252839  4.755020e-28
1   stock_portfolio_performance  0.986904    0.986716  4.985757e-01
2                       wsn-ale -0.065874   -0.108504  8.774297e-01
3                  daily-demand  0.660793    0.679240  2.668946e-04
4                         servo  0.469595    0.469226  9.999939e-01
5           yacht_hydrodynamics  0.992119    0.992220  5.855707e-02
6                      autoMPG6  0.765681    0.765952  3.578707e-01
7            excitation_current  0.999820    0.999821  1.581973e-01
8         real_estate_valuation  0.459408    0.474642  5.347367e-06
9                       wankara  0.969560    0.969572  4.522653e-01
10                      plastic  0.648509    0.631937  1.000000e+00
11                        laser  0.921516    0.923284  4.018644e-04
12        qsar-aquatic-toxicity  0.120081    0.126027  1.161612e-03
13                     baseball  0.400659    0.3




In [11]:
results_pdf.to_csv('existance-regression-dt.csv')