In [1]:
import numpy as np
import pandas as pd

import tqdm

from scipy.stats import wilcoxon

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import r2_score

from flipping_random_forest import FlippingDecisionTreeRegressor

from datasets import regr_datasets

In [2]:
data_loaders = regr_datasets['data_loader_function'].values.tolist()

In [3]:
results = []

validator = RepeatedKFold(n_splits=5, n_repeats=2000, random_state=5)

for data_loader in data_loaders:
    dataset = data_loader()
    X = dataset['data']
    y = dataset['target']
    
    r2s_orig = []
    r2s_flipped = []
    r2s_baseline = []
    r2s_baseline_flipped = []
    r2s_flipping_full = []
    r2s_flipping_coord = []
    
    for train, test in tqdm.tqdm(validator.split(X, y, y)):
        X_train = X[train]
        X_test = X[test]
        y_train = y[train]
        y_test = y[test]
        
        min_samples_leaf = 1#np.random.randint(1, 21)
        max_depth = None#np.random.choice([None, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        
        params = {'min_samples_leaf': min_samples_leaf,
                    'max_depth': max_depth}
        
        #try:
        pred = DecisionTreeRegressor(**params).fit(X_train, y_train).predict(X_test)
        r2s_orig.append(r2_score(y_test, pred))
        
        #pred = RandomForestRegressor().fit(-X_train, y_train).predict(-X_test)
        #r2s_flipped.append(r2_score(y_test, pred))
        r2s_flipped.append(0)
        
        pred = FlippingDecisionTreeRegressor(**params).fit(X_train, y_train).predict(X_test)
        r2s_baseline.append(r2_score(y_test, pred))
        
        #pred = FlippingRandomForestRegressor().fit(-X_train, y_train).predict(-X_test)
        #r2s_baseline_flipped.append(r2_score(y_test, pred))
        r2s_baseline_flipped.append(0)
        
        #pred = FlippingDecisionTreeRegressor(**params).fit(X_train, y_train).predict(X_test)
        #r2s_flipping_full.append(r2_score(y_test, pred))
        r2s_flipping_full.append(0)
        
        r2s_flipping_coord.append(0)
    
    tmp = [dataset['name'], np.mean(r2s_orig), np.mean(r2s_flipped), np.mean(r2s_baseline), 
                np.mean(r2s_baseline_flipped), np.mean(r2s_flipping_full), np.mean(r2s_flipping_coord), 
                r2s_orig, r2s_flipped, r2s_baseline, r2s_baseline_flipped, 
                r2s_flipping_full, r2s_flipping_coord]
    
    tmp = tmp + [wilcoxon(r2s_orig, r2s_baseline, alternative='less', zero_method='zsplit').pvalue]
    
    results.append(tmp)
    
    results_pdf = pd.DataFrame(results, columns=['name', 'r2_orig', 'r2_flipped', 'r2_baseline', 'r2_baseline_flipped', 
                                                'r2_flipping_full', 'r2_flipping_coord', 'r2s_orig', 'r2s_flipped', 'r2s_baseline', 
                                                'r2s_baseline_flipped', 'r2s_flipping_full', 'r2s_flipping_coord', 'p_full'])
    results_pdf['r2_baseline_min'] = results_pdf[['r2_baseline', 'r2_baseline_flipped']].apply(lambda x: min(x), axis=1)
    print(results_pdf[['name', 
                        'r2_orig', 
                        #'r2_flipped', 
                        'r2_baseline', 
                        #'r2_baseline_flipped', 
                        #'r2_baseline_min', 
                        'r2_flipping_full', 
                        'r2_flipping_coord', 'p_full']])
        


10000it [00:35, 280.60it/s]


     name   r2_orig  r2_baseline  r2_flipping_full  r2_flipping_coord  \
0  o-ring  0.106878     0.185584               0.0                0.0   

         p_full  
0  9.098317e-79  


10000it [00:24, 407.72it/s]


                          name   r2_orig  r2_baseline  r2_flipping_full  \
0                       o-ring  0.106878     0.185584               0.0   
1  stock_portfolio_performance  0.986690     0.986694               0.0   

   r2_flipping_coord        p_full  
0                0.0  9.098317e-79  
1                0.0  3.736719e-01  


10000it [00:33, 300.06it/s]


                          name   r2_orig  r2_baseline  r2_flipping_full  \
0                       o-ring  0.106878     0.185584               0.0   
1  stock_portfolio_performance  0.986690     0.986694               0.0   
2                      wsn-ale -0.054752    -0.034139               0.0   

   r2_flipping_coord        p_full  
0                0.0  9.098317e-79  
1                0.0  3.736719e-01  
2                0.0  4.435895e-18  


10000it [00:31, 320.18it/s]


                          name   r2_orig  r2_baseline  r2_flipping_full  \
0                       o-ring  0.106878     0.185584               0.0   
1  stock_portfolio_performance  0.986690     0.986694               0.0   
2                      wsn-ale -0.054752    -0.034139               0.0   
3                 daily-demand  0.660337     0.659757               0.0   

   r2_flipping_coord        p_full  
0                0.0  9.098317e-79  
1                0.0  3.736719e-01  
2                0.0  4.435895e-18  
3                0.0  6.568351e-01  


10000it [00:31, 313.02it/s]


                          name   r2_orig  r2_baseline  r2_flipping_full  \
0                       o-ring  0.106878     0.185584               0.0   
1  stock_portfolio_performance  0.986690     0.986694               0.0   
2                      wsn-ale -0.054752    -0.034139               0.0   
3                 daily-demand  0.660337     0.659757               0.0   
4                        servo  0.480690     0.481034               0.0   

   r2_flipping_coord        p_full  
0                0.0  9.098317e-79  
1                0.0  3.736719e-01  
2                0.0  4.435895e-18  
3                0.0  6.568351e-01  
4                0.0  1.502409e-04  


10000it [00:48, 208.06it/s]


                          name   r2_orig  r2_baseline  r2_flipping_full  \
0                       o-ring  0.106878     0.185584               0.0   
1  stock_portfolio_performance  0.986690     0.986694               0.0   
2                      wsn-ale -0.054752    -0.034139               0.0   
3                 daily-demand  0.660337     0.659757               0.0   
4                        servo  0.480690     0.481034               0.0   
5          yacht_hydrodynamics  0.992117     0.992129               0.0   

   r2_flipping_coord        p_full  
0                0.0  9.098317e-79  
1                0.0  3.736719e-01  
2                0.0  4.435895e-18  
3                0.0  6.568351e-01  
4                0.0  1.502409e-04  
5                0.0  4.343179e-01  


10000it [00:48, 207.19it/s]


                          name   r2_orig  r2_baseline  r2_flipping_full  \
0                       o-ring  0.106878     0.185584               0.0   
1  stock_portfolio_performance  0.986690     0.986694               0.0   
2                      wsn-ale -0.054752    -0.034139               0.0   
3                 daily-demand  0.660337     0.659757               0.0   
4                        servo  0.480690     0.481034               0.0   
5          yacht_hydrodynamics  0.992117     0.992129               0.0   
6                     autoMPG6  0.765998     0.768365               0.0   

   r2_flipping_coord        p_full  
0                0.0  9.098317e-79  
1                0.0  3.736719e-01  
2                0.0  4.435895e-18  
3                0.0  6.568351e-01  
4                0.0  1.502409e-04  
5                0.0  4.343179e-01  
6                0.0  2.545674e-26  


10000it [00:50, 196.19it/s]


                          name   r2_orig  r2_baseline  r2_flipping_full  \
0                       o-ring  0.106878     0.185584               0.0   
1  stock_portfolio_performance  0.986690     0.986694               0.0   
2                      wsn-ale -0.054752    -0.034139               0.0   
3                 daily-demand  0.660337     0.659757               0.0   
4                        servo  0.480690     0.481034               0.0   
5          yacht_hydrodynamics  0.992117     0.992129               0.0   
6                     autoMPG6  0.765998     0.768365               0.0   
7           excitation_current  0.999822     0.999829               0.0   

   r2_flipping_coord        p_full  
0                0.0  9.098317e-79  
1                0.0  3.736719e-01  
2                0.0  4.435895e-18  
3                0.0  6.568351e-01  
4                0.0  1.502409e-04  
5                0.0  4.343179e-01  
6                0.0  2.545674e-26  
7                0.0  1.4207

10000it [00:55, 180.38it/s]


                          name   r2_orig  r2_baseline  r2_flipping_full  \
0                       o-ring  0.106878     0.185584               0.0   
1  stock_portfolio_performance  0.986690     0.986694               0.0   
2                      wsn-ale -0.054752    -0.034139               0.0   
3                 daily-demand  0.660337     0.659757               0.0   
4                        servo  0.480690     0.481034               0.0   
5          yacht_hydrodynamics  0.992117     0.992129               0.0   
6                     autoMPG6  0.765998     0.768365               0.0   
7           excitation_current  0.999822     0.999829               0.0   
8        real_estate_valuation  0.459110     0.460785               0.0   

   r2_flipping_coord        p_full  
0                0.0  9.098317e-79  
1                0.0  3.736719e-01  
2                0.0  4.435895e-18  
3                0.0  6.568351e-01  
4                0.0  1.502409e-04  
5                0.0  4.343

10000it [00:53, 185.97it/s]


                          name   r2_orig  r2_baseline  r2_flipping_full  \
0                       o-ring  0.106878     0.185584               0.0   
1  stock_portfolio_performance  0.986690     0.986694               0.0   
2                      wsn-ale -0.054752    -0.034139               0.0   
3                 daily-demand  0.660337     0.659757               0.0   
4                        servo  0.480690     0.481034               0.0   
5          yacht_hydrodynamics  0.992117     0.992129               0.0   
6                     autoMPG6  0.765998     0.768365               0.0   
7           excitation_current  0.999822     0.999829               0.0   
8        real_estate_valuation  0.459110     0.460785               0.0   
9                      wankara  0.969425     0.969524               0.0   

   r2_flipping_coord        p_full  
0                0.0  9.098317e-79  
1                0.0  3.736719e-01  
2                0.0  4.435895e-18  
3                0.0  6.56

10000it [01:26, 115.56it/s]


                           name   r2_orig  r2_baseline  r2_flipping_full  \
0                        o-ring  0.106878     0.185584               0.0   
1   stock_portfolio_performance  0.986690     0.986694               0.0   
2                       wsn-ale -0.054752    -0.034139               0.0   
3                  daily-demand  0.660337     0.659757               0.0   
4                         servo  0.480690     0.481034               0.0   
5           yacht_hydrodynamics  0.992117     0.992129               0.0   
6                      autoMPG6  0.765998     0.768365               0.0   
7            excitation_current  0.999822     0.999829               0.0   
8         real_estate_valuation  0.459110     0.460785               0.0   
9                       wankara  0.969425     0.969524               0.0   
10                      plastic  0.648355     0.646112               0.0   

    r2_flipping_coord        p_full  
0                 0.0  9.098317e-79  
1          

10000it [01:34, 106.08it/s]


                           name   r2_orig  r2_baseline  r2_flipping_full  \
0                        o-ring  0.106878     0.185584               0.0   
1   stock_portfolio_performance  0.986690     0.986694               0.0   
2                       wsn-ale -0.054752    -0.034139               0.0   
3                  daily-demand  0.660337     0.659757               0.0   
4                         servo  0.480690     0.481034               0.0   
5           yacht_hydrodynamics  0.992117     0.992129               0.0   
6                      autoMPG6  0.765998     0.768365               0.0   
7            excitation_current  0.999822     0.999829               0.0   
8         real_estate_valuation  0.459110     0.460785               0.0   
9                       wankara  0.969425     0.969524               0.0   
10                      plastic  0.648355     0.646112               0.0   
11                        laser  0.922051     0.924615               0.0   

    r2_flip

10000it [01:18, 127.99it/s]


                           name   r2_orig  r2_baseline  r2_flipping_full  \
0                        o-ring  0.106878     0.185584               0.0   
1   stock_portfolio_performance  0.986690     0.986694               0.0   
2                       wsn-ale -0.054752    -0.034139               0.0   
3                  daily-demand  0.660337     0.659757               0.0   
4                         servo  0.480690     0.481034               0.0   
5           yacht_hydrodynamics  0.992117     0.992129               0.0   
6                      autoMPG6  0.765998     0.768365               0.0   
7            excitation_current  0.999822     0.999829               0.0   
8         real_estate_valuation  0.459110     0.460785               0.0   
9                       wankara  0.969425     0.969524               0.0   
10                      plastic  0.648355     0.646112               0.0   
11                        laser  0.922051     0.924615               0.0   
12        qs

10000it [01:05, 153.29it/s]


                           name   r2_orig  r2_baseline  r2_flipping_full  \
0                        o-ring  0.106878     0.185584               0.0   
1   stock_portfolio_performance  0.986690     0.986694               0.0   
2                       wsn-ale -0.054752    -0.034139               0.0   
3                  daily-demand  0.660337     0.659757               0.0   
4                         servo  0.480690     0.481034               0.0   
5           yacht_hydrodynamics  0.992117     0.992129               0.0   
6                      autoMPG6  0.765998     0.768365               0.0   
7            excitation_current  0.999822     0.999829               0.0   
8         real_estate_valuation  0.459110     0.460785               0.0   
9                       wankara  0.969425     0.969524               0.0   
10                      plastic  0.648355     0.646112               0.0   
11                        laser  0.922051     0.924615               0.0   
12        qs

10000it [00:45, 219.51it/s]


                           name   r2_orig  r2_baseline  r2_flipping_full  \
0                        o-ring  0.106878     0.185584               0.0   
1   stock_portfolio_performance  0.986690     0.986694               0.0   
2                       wsn-ale -0.054752    -0.034139               0.0   
3                  daily-demand  0.660337     0.659757               0.0   
4                         servo  0.480690     0.481034               0.0   
5           yacht_hydrodynamics  0.992117     0.992129               0.0   
6                      autoMPG6  0.765998     0.768365               0.0   
7            excitation_current  0.999822     0.999829               0.0   
8         real_estate_valuation  0.459110     0.460785               0.0   
9                       wankara  0.969425     0.969524               0.0   
10                      plastic  0.648355     0.646112               0.0   
11                        laser  0.922051     0.924615               0.0   
12        qs

10000it [01:44, 95.43it/s]


                           name   r2_orig  r2_baseline  r2_flipping_full  \
0                        o-ring  0.106878     0.185584               0.0   
1   stock_portfolio_performance  0.986690     0.986694               0.0   
2                       wsn-ale -0.054752    -0.034139               0.0   
3                  daily-demand  0.660337     0.659757               0.0   
4                         servo  0.480690     0.481034               0.0   
5           yacht_hydrodynamics  0.992117     0.992129               0.0   
6                      autoMPG6  0.765998     0.768365               0.0   
7            excitation_current  0.999822     0.999829               0.0   
8         real_estate_valuation  0.459110     0.460785               0.0   
9                       wankara  0.969425     0.969524               0.0   
10                      plastic  0.648355     0.646112               0.0   
11                        laser  0.922051     0.924615               0.0   
12        qs

10000it [01:49, 91.34it/s]


                           name   r2_orig  r2_baseline  r2_flipping_full  \
0                        o-ring  0.106878     0.185584               0.0   
1   stock_portfolio_performance  0.986690     0.986694               0.0   
2                       wsn-ale -0.054752    -0.034139               0.0   
3                  daily-demand  0.660337     0.659757               0.0   
4                         servo  0.480690     0.481034               0.0   
5           yacht_hydrodynamics  0.992117     0.992129               0.0   
6                      autoMPG6  0.765998     0.768365               0.0   
7            excitation_current  0.999822     0.999829               0.0   
8         real_estate_valuation  0.459110     0.460785               0.0   
9                       wankara  0.969425     0.969524               0.0   
10                      plastic  0.648355     0.646112               0.0   
11                        laser  0.922051     0.924615               0.0   
12        qs

10000it [03:32, 46.99it/s]


                           name   r2_orig  r2_baseline  r2_flipping_full  \
0                        o-ring  0.106878     0.185584               0.0   
1   stock_portfolio_performance  0.986690     0.986694               0.0   
2                       wsn-ale -0.054752    -0.034139               0.0   
3                  daily-demand  0.660337     0.659757               0.0   
4                         servo  0.480690     0.481034               0.0   
5           yacht_hydrodynamics  0.992117     0.992129               0.0   
6                      autoMPG6  0.765998     0.768365               0.0   
7            excitation_current  0.999822     0.999829               0.0   
8         real_estate_valuation  0.459110     0.460785               0.0   
9                       wankara  0.969425     0.969524               0.0   
10                      plastic  0.648355     0.646112               0.0   
11                        laser  0.922051     0.924615               0.0   
12        qs

10000it [01:42, 97.47it/s]


                           name   r2_orig  r2_baseline  r2_flipping_full  \
0                        o-ring  0.106878     0.185584               0.0   
1   stock_portfolio_performance  0.986690     0.986694               0.0   
2                       wsn-ale -0.054752    -0.034139               0.0   
3                  daily-demand  0.660337     0.659757               0.0   
4                         servo  0.480690     0.481034               0.0   
5           yacht_hydrodynamics  0.992117     0.992129               0.0   
6                      autoMPG6  0.765998     0.768365               0.0   
7            excitation_current  0.999822     0.999829               0.0   
8         real_estate_valuation  0.459110     0.460785               0.0   
9                       wankara  0.969425     0.969524               0.0   
10                      plastic  0.648355     0.646112               0.0   
11                        laser  0.922051     0.924615               0.0   
12        qs

10000it [03:15, 51.26it/s]

                           name   r2_orig  r2_baseline  r2_flipping_full  \
0                        o-ring  0.106878     0.185584               0.0   
1   stock_portfolio_performance  0.986690     0.986694               0.0   
2                       wsn-ale -0.054752    -0.034139               0.0   
3                  daily-demand  0.660337     0.659757               0.0   
4                         servo  0.480690     0.481034               0.0   
5           yacht_hydrodynamics  0.992117     0.992129               0.0   
6                      autoMPG6  0.765998     0.768365               0.0   
7            excitation_current  0.999822     0.999829               0.0   
8         real_estate_valuation  0.459110     0.460785               0.0   
9                       wankara  0.969425     0.969524               0.0   
10                      plastic  0.648355     0.646112               0.0   
11                        laser  0.922051     0.924615               0.0   
12        qs




In [4]:
results_pdf.to_csv("regression-tree.csv")

In [5]:
for idx, row in results_pdf.iterrows():
    #w0 = wilcoxon(row['r2s_baseline'], row['r2s_flipping_full'], alternative='greater').pvalue
    w1 = wilcoxon(row['r2s_baseline'], row['r2s_flipping_full'], alternative='less').pvalue
    print(row['name'], w1)

o-ring 1.0
stock_portfolio_performance 1.0
wsn-ale 1.0
daily-demand 1.0
servo 1.0
yacht_hydrodynamics 1.0
autoMPG6 1.0
excitation_current 1.0
real_estate_valuation 1.0
wankara 1.0
plastic 1.0
laser 1.0
qsar-aquatic-toxicity 1.0
baseball 1.0
maternal_health_risk 1.0
medical_cost 1.0
boom_bikes 1.0
wizmir 1.0
forestfires 0.0
winequality_red 1.0
