In [1]:
import numpy as np
import pandas as pd

import tqdm

from scipy.stats import wilcoxon

from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor

from datasets import regr_datasets

In [None]:
data_loaders = regr_datasets['data_loader_function'].values.tolist()

In [6]:
results = []

validator = RepeatedKFold(n_splits=5, n_repeats=400, random_state=5)

for data_loader in data_loaders:
    dataset = data_loader()
    X = dataset['data']
    y = dataset['target']
    
    r2s_orig = []
    r2s_flipped = []
    
    for train, test in tqdm.tqdm(validator.split(X, y, y)):
        X_train = X[train]
        X_test = X[test]
        y_train = y[train]
        y_test = y[test]
        
        min_samples_leaf = 1
        max_depth = None
        
        params = {'min_samples_leaf': min_samples_leaf,
                  'max_depth': max_depth}
        
        try:
            pred = RandomForestRegressor(**params).fit(X_train, y_train).predict(X_test)
            r2s_orig.append(r2_score(y_test, pred))
            
            pred = RandomForestRegressor(**params).fit(-X_train, y_train).predict(-X_test)
            r2s_flipped.append(r2_score(y_test, pred))
            
        except:
            pass
    
    tmp = [dataset['name'], np.mean(r2s_orig), np.mean(r2s_flipped),  
                            r2s_orig, r2s_flipped]
    
    tmp = tmp + [wilcoxon(r2s_orig, r2s_flipped, alternative='less', zero_method='zsplit').pvalue]
    
    results.append(tmp)
    
    results_pdf = pd.DataFrame(results, columns=['name', 'r2_orig', 'r2_flipped', 'r2s_orig', 'r2s_flipped', 'p_full'])
    
    print(results_pdf[['name', 
                        'r2_orig', 
                        'r2_flipped', 
                        'p_full']])
        


2000it [12:36,  2.64it/s]


       name   r2_orig  r2_flipped    p_full
0  autoMPG6  0.872129    0.871681  0.999077


2000it [15:03,  2.21it/s]


       name   r2_orig  r2_flipped    p_full
0  autoMPG6  0.872129    0.871681  0.999077
1  baseball  0.668188    0.666649  0.999605


2000it [17:49,  1.87it/s]


         name   r2_orig  r2_flipped    p_full
0    autoMPG6  0.872129    0.871681  0.999077
1    baseball  0.668188    0.666649  0.999605
2  boom_bikes  0.996079    0.996091  0.087284


2000it [07:41,  4.34it/s]


           name   r2_orig  r2_flipped        p_full
0      autoMPG6  0.872129    0.871681  9.990768e-01
1      baseball  0.668188    0.666649  9.996045e-01
2    boom_bikes  0.996079    0.996091  8.728442e-02
3  daily-demand  0.820918    0.825419  1.565313e-10


2000it [09:08,  3.65it/s]


                 name   r2_orig  r2_flipped        p_full
0            autoMPG6  0.872129    0.871681  9.990768e-01
1            baseball  0.668188    0.666649  9.996045e-01
2          boom_bikes  0.996079    0.996091  8.728442e-02
3        daily-demand  0.820918    0.825419  1.565313e-10
4  excitation_current  0.999906    0.999912  4.634640e-70


2000it [13:12,  2.52it/s]


                 name   r2_orig  r2_flipped        p_full
0            autoMPG6  0.872129    0.871681  9.990768e-01
1            baseball  0.668188    0.666649  9.996045e-01
2          boom_bikes  0.996079    0.996091  8.728442e-02
3        daily-demand  0.820918    0.825419  1.565313e-10
4  excitation_current  0.999906    0.999912  4.634640e-70
5               laser  0.963051    0.963348  1.390078e-02


2000it [09:50,  3.39it/s]


                   name   r2_orig  r2_flipped        p_full
0              autoMPG6  0.872129    0.871681  9.990768e-01
1              baseball  0.668188    0.666649  9.996045e-01
2            boom_bikes  0.996079    0.996091  8.728442e-02
3          daily-demand  0.820918    0.825419  1.565313e-10
4    excitation_current  0.999906    0.999912  4.634640e-70
5                 laser  0.963051    0.963348  1.390078e-02
6  maternal_health_risk  0.752387    0.753864  1.167036e-20


2000it [13:48,  2.42it/s]


                   name   r2_orig  r2_flipped        p_full
0              autoMPG6  0.872129    0.871681  9.990768e-01
1              baseball  0.668188    0.666649  9.996045e-01
2            boom_bikes  0.996079    0.996091  8.728442e-02
3          daily-demand  0.820918    0.825419  1.565313e-10
4    excitation_current  0.999906    0.999912  4.634640e-70
5                 laser  0.963051    0.963348  1.390078e-02
6  maternal_health_risk  0.752387    0.753864  1.167036e-20
7          medical_cost  0.694032    0.693415  9.999998e-01


2000it [06:01,  5.53it/s]


                   name   r2_orig  r2_flipped        p_full
0              autoMPG6  0.872129    0.871681  9.990768e-01
1              baseball  0.668188    0.666649  9.996045e-01
2            boom_bikes  0.996079    0.996091  8.728442e-02
3          daily-demand  0.820918    0.825419  1.565313e-10
4    excitation_current  0.999906    0.999912  4.634640e-70
5                 laser  0.963051    0.963348  1.390078e-02
6  maternal_health_risk  0.752387    0.753864  1.167036e-20
7          medical_cost  0.694032    0.693415  9.999998e-01
8                o-ring  0.141579    0.168934  5.519880e-06


2000it [11:00,  3.03it/s]


                   name   r2_orig  r2_flipped        p_full
0              autoMPG6  0.872129    0.871681  9.990768e-01
1              baseball  0.668188    0.666649  9.996045e-01
2            boom_bikes  0.996079    0.996091  8.728442e-02
3          daily-demand  0.820918    0.825419  1.565313e-10
4    excitation_current  0.999906    0.999912  4.634640e-70
5                 laser  0.963051    0.963348  1.390078e-02
6  maternal_health_risk  0.752387    0.753864  1.167036e-20
7          medical_cost  0.694032    0.693415  9.999998e-01
8                o-ring  0.141579    0.168934  5.519880e-06
9               plastic  0.715051    0.710116  1.000000e+00


2000it [14:00,  2.38it/s]


                     name   r2_orig  r2_flipped        p_full
0                autoMPG6  0.872129    0.871681  9.990768e-01
1                baseball  0.668188    0.666649  9.996045e-01
2              boom_bikes  0.996079    0.996091  8.728442e-02
3            daily-demand  0.820918    0.825419  1.565313e-10
4      excitation_current  0.999906    0.999912  4.634640e-70
5                   laser  0.963051    0.963348  1.390078e-02
6    maternal_health_risk  0.752387    0.753864  1.167036e-20
7            medical_cost  0.694032    0.693415  9.999998e-01
8                  o-ring  0.141579    0.168934  5.519880e-06
9                 plastic  0.715051    0.710116  1.000000e+00
10  qsar-aquatic-toxicity  0.524237    0.524178  6.749654e-01


2000it [11:19,  2.94it/s]


                     name   r2_orig  r2_flipped        p_full
0                autoMPG6  0.872129    0.871681  9.990768e-01
1                baseball  0.668188    0.666649  9.996045e-01
2              boom_bikes  0.996079    0.996091  8.728442e-02
3            daily-demand  0.820918    0.825419  1.565313e-10
4      excitation_current  0.999906    0.999912  4.634640e-70
5                   laser  0.963051    0.963348  1.390078e-02
6    maternal_health_risk  0.752387    0.753864  1.167036e-20
7            medical_cost  0.694032    0.693415  9.999998e-01
8                  o-ring  0.141579    0.168934  5.519880e-06
9                 plastic  0.715051    0.710116  1.000000e+00
10  qsar-aquatic-toxicity  0.524237    0.524178  6.749654e-01
11  real_estate_valuation  0.693135    0.693686  4.485718e-01


2000it [42:20,  1.27s/it]


                     name   r2_orig  r2_flipped        p_full
0                autoMPG6  0.872129    0.871681  9.990768e-01
1                baseball  0.668188    0.666649  9.996045e-01
2              boom_bikes  0.996079    0.996091  8.728442e-02
3            daily-demand  0.820918    0.825419  1.565313e-10
4      excitation_current  0.999906    0.999912  4.634640e-70
5                   laser  0.963051    0.963348  1.390078e-02
6    maternal_health_risk  0.752387    0.753864  1.167036e-20
7            medical_cost  0.694032    0.693415  9.999998e-01
8                  o-ring  0.141579    0.168934  5.519880e-06
9                 plastic  0.715051    0.710116  1.000000e+00
10  qsar-aquatic-toxicity  0.524237    0.524178  6.749654e-01
11  real_estate_valuation  0.693135    0.693686  4.485718e-01
12   residential_building  0.958179    0.957621  9.999996e-01


2000it [06:27,  5.16it/s]


                     name   r2_orig  r2_flipped        p_full
0                autoMPG6  0.872129    0.871681  9.990768e-01
1                baseball  0.668188    0.666649  9.996045e-01
2              boom_bikes  0.996079    0.996091  8.728442e-02
3            daily-demand  0.820918    0.825419  1.565313e-10
4      excitation_current  0.999906    0.999912  4.634640e-70
5                   laser  0.963051    0.963348  1.390078e-02
6    maternal_health_risk  0.752387    0.753864  1.167036e-20
7            medical_cost  0.694032    0.693415  9.999998e-01
8                  o-ring  0.141579    0.168934  5.519880e-06
9                 plastic  0.715051    0.710116  1.000000e+00
10  qsar-aquatic-toxicity  0.524237    0.524178  6.749654e-01
11  real_estate_valuation  0.693135    0.693686  4.485718e-01
12   residential_building  0.958179    0.957621  9.999996e-01
13                  servo  0.633171    0.632337  7.670612e-01


2000it [05:55,  5.62it/s]


                           name   r2_orig  r2_flipped        p_full
0                      autoMPG6  0.872129    0.871681  9.990768e-01
1                      baseball  0.668188    0.666649  9.996045e-01
2                    boom_bikes  0.996079    0.996091  8.728442e-02
3                  daily-demand  0.820918    0.825419  1.565313e-10
4            excitation_current  0.999906    0.999912  4.634640e-70
5                         laser  0.963051    0.963348  1.390078e-02
6          maternal_health_risk  0.752387    0.753864  1.167036e-20
7                  medical_cost  0.694032    0.693415  9.999998e-01
8                        o-ring  0.141579    0.168934  5.519880e-06
9                       plastic  0.715051    0.710116  1.000000e+00
10        qsar-aquatic-toxicity  0.524237    0.524178  6.749654e-01
11        real_estate_valuation  0.693135    0.693686  4.485718e-01
12         residential_building  0.958179    0.957621  9.999996e-01
13                        servo  0.633171    0.6

2000it [10:24,  3.20it/s]


                           name   r2_orig  r2_flipped        p_full
0                      autoMPG6  0.872129    0.871681  9.990768e-01
1                      baseball  0.668188    0.666649  9.996045e-01
2                    boom_bikes  0.996079    0.996091  8.728442e-02
3                  daily-demand  0.820918    0.825419  1.565313e-10
4            excitation_current  0.999906    0.999912  4.634640e-70
5                         laser  0.963051    0.963348  1.390078e-02
6          maternal_health_risk  0.752387    0.753864  1.167036e-20
7                  medical_cost  0.694032    0.693415  9.999998e-01
8                        o-ring  0.141579    0.168934  5.519880e-06
9                       plastic  0.715051    0.710116  1.000000e+00
10        qsar-aquatic-toxicity  0.524237    0.524178  6.749654e-01
11        real_estate_valuation  0.693135    0.693686  4.485718e-01
12         residential_building  0.958179    0.957621  9.999996e-01
13                        servo  0.633171    0.6

2000it [37:35,  1.13s/it]


                           name   r2_orig  r2_flipped        p_full
0                      autoMPG6  0.872129    0.871681  9.990768e-01
1                      baseball  0.668188    0.666649  9.996045e-01
2                    boom_bikes  0.996079    0.996091  8.728442e-02
3                  daily-demand  0.820918    0.825419  1.565313e-10
4            excitation_current  0.999906    0.999912  4.634640e-70
5                         laser  0.963051    0.963348  1.390078e-02
6          maternal_health_risk  0.752387    0.753864  1.167036e-20
7                  medical_cost  0.694032    0.693415  9.999998e-01
8                        o-ring  0.141579    0.168934  5.519880e-06
9                       plastic  0.715051    0.710116  1.000000e+00
10        qsar-aquatic-toxicity  0.524237    0.524178  6.749654e-01
11        real_estate_valuation  0.693135    0.693686  4.485718e-01
12         residential_building  0.958179    0.957621  9.999996e-01
13                        servo  0.633171    0.6

2000it [28:12,  1.18it/s]


                           name   r2_orig  r2_flipped        p_full
0                      autoMPG6  0.872129    0.871681  9.990768e-01
1                      baseball  0.668188    0.666649  9.996045e-01
2                    boom_bikes  0.996079    0.996091  8.728442e-02
3                  daily-demand  0.820918    0.825419  1.565313e-10
4            excitation_current  0.999906    0.999912  4.634640e-70
5                         laser  0.963051    0.963348  1.390078e-02
6          maternal_health_risk  0.752387    0.753864  1.167036e-20
7                  medical_cost  0.694032    0.693415  9.999998e-01
8                        o-ring  0.141579    0.168934  5.519880e-06
9                       plastic  0.715051    0.710116  1.000000e+00
10        qsar-aquatic-toxicity  0.524237    0.524178  6.749654e-01
11        real_estate_valuation  0.693135    0.693686  4.485718e-01
12         residential_building  0.958179    0.957621  9.999996e-01
13                        servo  0.633171    0.6

2000it [06:24,  5.21it/s]


                           name   r2_orig  r2_flipped        p_full
0                      autoMPG6  0.872129    0.871681  9.990768e-01
1                      baseball  0.668188    0.666649  9.996045e-01
2                    boom_bikes  0.996079    0.996091  8.728442e-02
3                  daily-demand  0.820918    0.825419  1.565313e-10
4            excitation_current  0.999906    0.999912  4.634640e-70
5                         laser  0.963051    0.963348  1.390078e-02
6          maternal_health_risk  0.752387    0.753864  1.167036e-20
7                  medical_cost  0.694032    0.693415  9.999998e-01
8                        o-ring  0.141579    0.168934  5.519880e-06
9                       plastic  0.715051    0.710116  1.000000e+00
10        qsar-aquatic-toxicity  0.524237    0.524178  6.749654e-01
11        real_estate_valuation  0.693135    0.693686  4.485718e-01
12         residential_building  0.958179    0.957621  9.999996e-01
13                        servo  0.633171    0.6

2000it [07:50,  4.25it/s]

                           name   r2_orig  r2_flipped        p_full
0                      autoMPG6  0.872129    0.871681  9.990768e-01
1                      baseball  0.668188    0.666649  9.996045e-01
2                    boom_bikes  0.996079    0.996091  8.728442e-02
3                  daily-demand  0.820918    0.825419  1.565313e-10
4            excitation_current  0.999906    0.999912  4.634640e-70
5                         laser  0.963051    0.963348  1.390078e-02
6          maternal_health_risk  0.752387    0.753864  1.167036e-20
7                  medical_cost  0.694032    0.693415  9.999998e-01
8                        o-ring  0.141579    0.168934  5.519880e-06
9                       plastic  0.715051    0.710116  1.000000e+00
10        qsar-aquatic-toxicity  0.524237    0.524178  6.749654e-01
11        real_estate_valuation  0.693135    0.693686  4.485718e-01
12         residential_building  0.958179    0.957621  9.999996e-01
13                        servo  0.633171    0.6




In [8]:
results_pdf.to_csv('existance-regression-rf.csv')