In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [2]:
columns={'spacegroup' : 'sg',
                            'number_of_total_atoms' : 'Natoms',
                            'percent_atom_al' : 'x_Al',
                            'percent_atom_ga' : 'x_Ga',
                            'percent_atom_in' : 'x_In',
                            'lattice_vector_1_ang' : 'a',
                            'lattice_vector_2_ang' : 'b',
                            'lattice_vector_3_ang' : 'c',
                            'lattice_angle_alpha_degree' : 'alpha',
                            'lattice_angle_beta_degree' : 'beta',
                            'lattice_angle_gamma_degree' : 'gamma',
                            'formation_energy_ev_natom' : 'E',
                            'bandgap_energy_ev' : 'Eg'}
    
    
df_train = pd.read_csv("./input/train.csv").rename(columns=columns)
df_train["dataset"] = "train"
df_train["E"]=np.log1p(df_train["E"])
df_train["Eg"]=np.log1p(df_train["Eg"])
df_test = pd.read_csv("./input/test.csv").rename(columns=columns)
df_test["dataset"] = "test"
df_total = pd.concat([df_train, df_test], ignore_index=True)

len(df_train),len(df_test),len(df_total)

(2400, 600, 3000)

In [3]:
df_total.head()

Unnamed: 0,E,Eg,Natoms,a,alpha,b,beta,c,dataset,gamma,id,sg,x_Al,x_Ga,x_In
0,0.065788,1.490362,80.0,9.9523,90.0026,8.5513,90.0023,9.1775,train,90.0017,1,33,0.625,0.375,0.0
1,0.222343,1.366347,80.0,6.184,90.0186,6.1838,89.998,23.6287,train,120.0025,2,194,0.625,0.375,0.0
2,0.167293,1.320101,40.0,9.751,90.9688,5.6595,91.1228,13.963,train,30.5185,3,227,0.8125,0.1875,0.0
3,0.196553,1.469992,30.0,5.0036,89.9888,5.0034,90.0119,13.5318,train,120.0017,4,167,0.75,0.0,0.25
4,0.049266,0.866806,80.0,6.6614,89.996,6.6612,90.0006,24.5813,train,119.9893,5,194,0.0,0.625,0.375


In [4]:
df_total.tail()

Unnamed: 0,E,Eg,Natoms,a,alpha,b,beta,c,dataset,gamma,id,sg,x_Al,x_Ga,x_In
2995,,,80.0,24.8145,90.0002,6.3964,104.7733,6.2933,test,90.0001,596,12,0.0,0.5938,0.4062
2996,,,40.0,5.5783,90.0008,9.4849,89.9967,10.1107,test,90.0004,597,33,0.125,0.0,0.875
2997,,,80.0,6.9377,90.0072,6.9372,89.988,25.0641,test,119.9857,598,194,0.0,0.25,0.75
2998,,,40.0,5.1841,90.0041,8.8659,90.0009,9.4956,test,90.0007,599,33,0.625,0.0,0.375
2999,,,80.0,9.4959,90.0029,9.4956,90.0031,9.4956,test,89.9969,600,206,0.375,0.3438,0.2812


In [5]:
#from https://www.kaggle.com/cbartel/random-forest-using-elemental-properties
def get_vol(a, b, c, alpha, beta, gamma):
    """
    Args:
        a (float) - lattice vector 1
        b (float) - lattice vector 2
        c (float) - lattice vector 3
        alpha (float) - lattice angle 1 [radians]
        beta (float) - lattice angle 2 [radians]
        gamma (float) - lattice angle 3 [radians]
    Returns:
        volume (float) of the parallelepiped unit cell
    """
    alpha=alpha*np.pi/180
    beta=beta*np.pi/180
    gamma=gamma*np.pi/180
    return a*b*c*np.sqrt(1 + 2*np.cos(alpha)*np.cos(beta)*np.cos(gamma)
                           - np.cos(alpha)**2
                           - np.cos(beta)**2
                           - np.cos(gamma)**2)


    
# compute the cell volumes 
df_total['vol'] = get_vol(df_total['a'], df_total['b'], df_total['c'],
                          df_total['alpha'], df_total['beta'], df_total['gamma'])
#df_total[['a','b','c','alpha','beta','gamma','vol']].head()
df_total['density']=df_total['Natoms']/df_total["vol"]
df_total['density_Al']=df_total['density']*df_total['x_Al']
df_total['density_Ga']=df_total['density']*df_total['x_Ga']
df_total['density_In']=df_total['density']*df_total['x_In']
df_total['sg']=df_total['sg'].astype('category')

In [6]:
df_total.head()

Unnamed: 0,E,Eg,Natoms,a,alpha,b,beta,c,dataset,gamma,id,sg,x_Al,x_Ga,x_In,vol,density,density_Al,density_Ga,density_In
0,0.065788,1.490362,80.0,9.9523,90.0026,8.5513,90.0023,9.1775,train,90.0017,1,33,0.625,0.375,0.0,781.052081,0.102426,0.064016,0.03841,0.0
1,0.222343,1.366347,80.0,6.184,90.0186,6.1838,89.998,23.6287,train,120.0025,2,194,0.625,0.375,0.0,782.50011,0.102236,0.063898,0.038339,0.0
2,0.167293,1.320101,40.0,9.751,90.9688,5.6595,91.1228,13.963,train,30.5185,3,227,0.8125,0.1875,0.0,391.227531,0.102242,0.083072,0.01917,0.0
3,0.196553,1.469992,30.0,5.0036,89.9888,5.0034,90.0119,13.5318,train,120.0017,4,167,0.75,0.0,0.25,293.377334,0.102257,0.076693,0.0,0.025564
4,0.049266,0.866806,80.0,6.6614,89.996,6.6612,90.0006,24.5813,train,119.9893,5,194,0.0,0.625,0.375,944.713843,0.084682,0.0,0.052926,0.031756


In [7]:
#Encoding of cat features
import sys 
sys.path.append("../kaggle_varie")
from  varie import *
cols_to_enc=["sg"]

#binary encoder
#enc=bin_enc(df_total,cols_to_enc,verbose=2,copy=True,drop_original=True,ordinal_only=False)
#one-hot encoder
enc=pd.get_dummies(df_total,columns=cols_to_enc)



In [8]:
def grid_search_fct(model,params,df,y_col,n_iter=20,cv=4,drop_col=[],verbose=2):
    
    X_train=df.drop(y_col+drop_col,axis=1).values
    grids=[]
    for y in y_col:
        print(y)
        y_train=df[y].values
        print(X_train.shape,y_train.shape)

        grid=RandomizedSearchCV(model,param_distributions=params, n_iter=n_iter,cv=cv,verbose=verbose,scoring="neg_mean_squared_error" )

        grid.fit(X_train,y_train)
        grids.append(grid)
    return grids

In [9]:
#grid search for random forest
import scipy
from  sklearn.model_selection import RandomizedSearchCV
from sklearn import *
from catboost import CatBoostRegressor,CatBoostClassifier
from sklearn.svm import SVR
from sklearn.linear_model import  ElasticNet
from sklearn.ensemble import  GradientBoostingRegressor, RandomForestRegressor,AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
#from sklearn.kernel_approximation import Nystroem
#from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import Lasso,Ridge,LinearRegression
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from varie import lognuniform
#from varie import loguniform2
%aimport varie
import varie
from scipy.stats import uniform, randint

y_col=["E","Eg"]
drop_col=["id","dataset"]
df_total_train_eval=enc[df_total.dataset=='train']
df_total_test=enc[df_total.dataset=='test']

X_train=df_total_train_eval.drop(y_col+drop_col,axis=1).values
X_test=df_total_test.drop(y_col+drop_col,axis=1).values

models={
    
    'knn':
           (KNeighborsRegressor(),
            {'n_neighbors':scipy.stats.randint(1,100)}),
    
    'svr':
           (SVR(verbose=False,kernel='linear'),
            {'C':lognuniform(low=-4,high=4,base=10,size=100),
             'epsilon':lognuniform(low=-2,high=0,base=10,size=100)}),
    
    'svr_rbf':
           (SVR(verbose=False,kernel='rbf'),
            {'C': lognuniform(low=-2,high=2,base=10,size=100),
             'gamma':lognuniform(low=-2,high=2,base=10,size=100)}),

    'rf':
           (ensemble.RandomForestRegressor(verbose=False),
            {"max_depth": scipy.stats.randint(1,100), 
             'n_estimators': scipy.stats.randint(1,400),
             'max_features':('log2','sqrt','auto'),
             'min_samples_split':scipy.stats.randint(2,5),
             'min_samples_leaf':scipy.stats.randint(1,5)}),
    
    'cb':
           (CatBoostRegressor(loss_function='RMSE', eval_metric='RMSE',logging_level='Silent'),
            {"depth": scipy.stats.randint(1,6), 
             'iterations': scipy.stats.randint(100,2000),
             'learning_rate':lognuniform(low=-2,high=-1,base=10,size=100),
             'l2_leaf_reg': scipy.stats.randint(2,4)}),
    
    'mlp': 
           (MLPRegressor((80, 10), early_stopping=False),
             {'hidden_layer_sizes':scipy.stats.randint(1,100),
              'alpha':lognuniform(low=-5,high=-1,base=10,size=100)}),
             
     'gb':
           (GradientBoostingRegressor(n_estimators=100),
            {'learning_rate':lognuniform(low=-3,high=-1,base=10,size=100), 
             'n_estimators': scipy.stats.randint(1,300),
             'max_depth':scipy.stats.randint(1,5),
             'max_features':('sqrt','log2','auto')}),
    
    'lasso':
            (Lasso(),
            {'alpha':lognuniform(low=-6,high=2,base=10,size=100)}),  

    'ridge':
            (Ridge(),
            {'alpha':varie.lognuniform(low=-6,high=2,base=10,size=100)}),
    
    'eln':
            (ElasticNet(),
            {'alpha':lognuniform(low=-6,high=4,base=10,size=100), 
             'l1_ratio':lognuniform(low=-6,high=4,base=10,size=100)}),
    
    'xgb':
        (XGBRegressor(),
         {'max_depth':scipy.stats.randint(1,100), 
          'learning_rate':lognuniform(low=-4,high=-0.5,base=10,size=100), 
          'n_estimators':scipy.stats.randint(1,400),
          'colsample_bytree': uniform(0.55, 0.66),
          'min_child_weight': randint(30, 60),
          'colsample_bytree': uniform(0.6, 0.4),
          'reg_lambda': uniform(1, 2),
          'reg_alpha': uniform(1, 2),
}),
    
 #does not install    
    'gbm' :
        (LGBMRegressor(objective='regression'),
            {'num_leaves':scipy.stats.randint(1,200), 
          'learning_rate':lognuniform(low=-4,high=-0.5,base=10,size=100), 
          'n_estimators':scipy.stats.randint(1,400)}),
    'adb' :
        (AdaBoostRegressor(loss="square"),
            {'learning_rate':lognuniform(low=-4,high=-0.1,base=10,size=10), 
             'n_estimators':scipy.stats.randint(1,400)}),         

    
       }
 
    

try:
    results
except:
    results={}
    
for (tag,model) in  models.items():
    if (tag not in results):
        print(tag)
        results[tag]=grid_search_fct(model[0],model[1],df_total_train_eval,y_col,n_iter=10,cv=4,drop_col=drop_col,verbose=1)


    
    #grid=RandomizedSearchCV(model[0],param_distributions=params, n_iter=20,cv=4,verbose=2,scoring="neg_mean_squared_error" )

                        
    #grid.fit(X_train,y_train)
    #grids.append(grid)

ERROR:root:Line magic function `%aimport` not found.


knn
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    3.3s finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    2.7s finished


svr
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed: 26.4min finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed: 37.3min finished


svr_rbf
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    7.1s finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:   18.2s finished


rf
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:  1.3min finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:   58.6s finished


cb
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:  9.4min finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed: 12.9min finished


mlp
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:   40.6s finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:   39.9s finished


gb
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:   15.6s finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    7.3s finished


lasso
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.8s finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.9s finished


ridge
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits
Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.1s finished


eln
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    1.8s finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    1.6s finished


xgb
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:   11.6s finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:   24.8s finished


gbm
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:  1.4min finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:  1.1min finished


adb
E
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:   35.7s finished


Eg
(2400, 21) (2400,)
Fitting 4 folds for each of 10 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:  1.2min finished


In [11]:
import pickle
pickle.dump(results, open( "results_cv_10iter.pickle", "wb" ))
#results.pop('cb')

In [12]:
#best models and their performance

for tag,grids in results.items():
    print(tag)
    for grid in grids:
        print(grid.best_params_)
    print((np.sqrt(-grids[0].best_score_)+np.sqrt(-grids[1].best_score_))/2,
          np.sqrt(-grids[0].best_score_),np.sqrt(-grids[1].best_score_))

knn
{'n_neighbors': 5}
{'n_neighbors': 2}
0.0851628161367 0.0576802878912 0.112645344382
svr
{'epsilon': 0.017254490142290496, 'C': 0.11206449421619946}
{'epsilon': 0.020178772192372886, 'C': 0.0033228622244295914}
0.0919093152693 0.0583481105849 0.125470519954
svr_rbf
{'gamma': 0.032664503175227073, 'C': 4.5831631568414792}
{'gamma': 0.012721154634095479, 'C': 0.6844846896861595}
0.0971582764686 0.0707211313243 0.123595421613
rf
{'max_depth': 37, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'min_samples_split': 2, 'n_estimators': 313}
{'max_depth': 80, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'min_samples_split': 3, 'n_estimators': 308}
0.0620699163751 0.0332204733867 0.0909193593635
cb
{'depth': 4, 'iterations': 1328, 'l2_leaf_reg': 2, 'learning_rate': 0.015902606749020831}
{'depth': 4, 'iterations': 1727, 'l2_leaf_reg': 3, 'learning_rate': 0.015902606749020831}
0.0594551904799 0.0319222010461 0.0869881799138
mlp
{'alpha': 0.00052380669831485584, 'hidden_layer_sizes': 86}
{'a

In [16]:
import mlxtend
mlxtend.

<module 'mlxtend' from 'C:\\ProgramData\\Anaconda3\\lib\\site-packages\\mlxtend\\__init__.py'>

In [21]:
#Stacking via mlxtend
#cbr=CatBoostRegressor(loss_function='RMSE', eval_metric='RMSE',logging_level='Silent')
#    'rf':
rf1=ensemble.RandomForestRegressor(verbose=False)
rf2=ensemble.RandomForestRegressor(verbose=False)
params_meta={"meta-randomforestregressor__max_depth": scipy.stats.randint(1,100), 
             'meta-randomforestregressor__n_estimators': scipy.stats.randint(1,400),
             'meta-randomforestregressor__max_features':('log2','sqrt','auto'),
             'meta-randomforestregressor__min_samples_split':scipy.stats.randint(2,5),
             'meta-randomforestregressor__min_samples_leaf':scipy.stats.randint(1,5)}




cb1=CatBoostRegressor(loss_function='RMSE', eval_metric='RMSE',logging_level='Silent')
cb2=CatBoostRegressor(loss_function='RMSE', eval_metric='RMSE',logging_level='Silent')
params_meta={"meta-catboostregressor__depth": scipy.stats.randint(1,6), 
             'meta-catboostregressor__iterations': scipy.stats.randint(100,2000),
             'meta-catboostregressor__learning_rate':lognuniform(low=-2,high=-1,base=10,size=100),
             'meta-catboostregressor__l2_leaf_reg': scipy.stats.randint(2,4)}

#abc = SVR(kernel='rbf')




#grid search
#params_meta = {'meta-svr__C': [0.1, 1.0, 10.0, 100.0],
#          'meta-svr__gamma': [0.1, 1.0, 10.0]}

#randomized search
#params_meta = {'meta-svr__C': lognuniform(low=-2,high=2,base=10,size=100),
#               'meta-svr__gamma': lognuniform(low=-2,high=2,base=10,size=100)}

#{'meta-cbr__depth': scipy.stats.randint(1,6)}
             
             
             #'meta-meta_learner__iterations': scipy.stats.randint(100,2000),
             #'meta-meta_learner__learning_rate':lognuniform(low=-2,high=-1,base=10,size=100),
             #'meta-meta_learner__l2_leaf_reg': scipy.stats.randint(2,4)}
            
from mlxtend.regressor import StackingRegressor

learners1=[g[0].best_estimator_ for g in results.values()]
learners2=[g[1].best_estimator_ for g in results.values()]
learners=[learners1,learners2]

stregr = [StackingRegressor(regressors=learners1,meta_regressor=cb1),
          StackingRegressor(regressors=learners2,meta_regressor=cb2)]




results2=[]
for s,y in zip(stregr,y_col):
    print(y)
    y_train=df_total_train_eval[y]
    grid=RandomizedSearchCV(s,param_distributions=params_meta, n_iter=10,cv=3,verbose=10,scoring="neg_mean_squared_error" )
    #grid=GridSearchCV(s,param_grid=params_meta, cv=5,verbose=10,scoring="neg_mean_squared_error" )
    grid.fit(X_train, y_train)
    results2.append(grid)



E
Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1240, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1240, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291, score=-0.001181, total= 2.7min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1240, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  2.7min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1240, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291, score=-0.000997, total= 3.6min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1240, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  6.3min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1240, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291, score=-0.001216, total= 2.7min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1240, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  9.1min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1240, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291, score=-0.001087, total= 2.4min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1240, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 11.5min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1240, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291, score=-0.001203, total= 2.4min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=779, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.050433883381 


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 13.9min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=779, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.050433883381, score=-0.001186, total= 2.6min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=779, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.050433883381 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed: 16.6min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=779, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.050433883381, score=-0.000972, total= 3.5min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=779, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.050433883381 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed: 20.1min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=779, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.050433883381, score=-0.001242, total= 2.6min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=779, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.050433883381 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed: 22.6min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=779, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.050433883381, score=-0.001090, total= 2.4min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=779, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.050433883381 


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed: 25.1min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=779, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.050433883381, score=-0.001219, total= 2.4min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=657, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.014663140265 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=657, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.014663140265, score=-0.001190, total= 2.7min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=657, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.014663140265 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=657, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.014663140265, score=-0.000987, total= 3.5min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=657, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.014663140265 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=657, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.014663140265, score=-0.001201, total= 2.8min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=657, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.014663140265 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=657, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.014663140265, score=-0.001051, total= 2.5min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=657, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.014663140265 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=657, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.014663140265, score=-0.001218, total= 2.5min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1267, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.030002942316 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1267, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.030002942316, score=-0.001299, total= 3.1min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1267, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.030002942316 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1267, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.030002942316, score=-0.001157, total= 4.0min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1267, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.030002942316 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1267, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.030002942316, score=-0.001328, total= 3.1min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1267, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.030002942316 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1267, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.030002942316, score=-0.001213, total= 3.0min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1267, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.030002942316 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1267, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.030002942316, score=-0.001319, total= 3.0min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1406, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0379708020985 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1406, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0379708020985, score=-0.001229, total= 2.9min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1406, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0379708020985 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1406, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0379708020985, score=-0.000989, total= 3.6min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1406, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0379708020985 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1406, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0379708020985, score=-0.001224, total= 2.8min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1406, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0379708020985 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1406, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0379708020985, score=-0.001111, total= 2.5min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1406, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0379708020985 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=1406, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0379708020985, score=-0.001240, total= 2.5min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1072, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0706308657718 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1072, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0706308657718, score=-0.001336, total= 3.1min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1072, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0706308657718 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1072, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0706308657718, score=-0.001214, total= 3.9min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1072, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0706308657718 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1072, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0706308657718, score=-0.001353, total= 3.1min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1072, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0706308657718 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1072, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0706308657718, score=-0.001197, total= 2.8min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1072, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0706308657718 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1072, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0706308657718, score=-0.001384, total= 2.9min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=573, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0453912307059 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=573, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0453912307059, score=-0.001181, total= 2.7min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=573, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0453912307059 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=573, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0453912307059, score=-0.000993, total= 3.5min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=573, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0453912307059 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=573, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0453912307059, score=-0.001236, total= 2.7min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=573, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0453912307059 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=573, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0453912307059, score=-0.001080, total= 2.4min
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=573, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0453912307059 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=573, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0453912307059, score=-0.001225, total= 2.4min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1133, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0109965250616 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1133, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0109965250616, score=-0.001224, total= 3.0min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1133, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0109965250616 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1133, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0109965250616, score=-0.001032, total= 3.8min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1133, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0109965250616 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1133, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0109965250616, score=-0.001222, total= 2.7min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1133, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0109965250616 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1133, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0109965250616, score=-0.001113, total= 2.5min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1133, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0109965250616 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1133, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0109965250616, score=-0.001263, total= 2.5min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1317, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0627510193949 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1317, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0627510193949, score=-0.001288, total= 2.7min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1317, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0627510193949 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1317, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0627510193949, score=-0.001137, total= 4.5min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1317, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0627510193949 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1317, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0627510193949, score=-0.001327, total= 3.2min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1317, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0627510193949 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1317, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0627510193949, score=-0.001178, total= 2.7min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1317, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0627510193949 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1317, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0627510193949, score=-0.001360, total= 2.6min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=297, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0204555524463 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=297, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0204555524463, score=-0.001180, total= 2.3min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=297, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0204555524463 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=297, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0204555524463, score=-0.000990, total= 3.3min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=297, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0204555524463 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=297, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0204555524463, score=-0.001186, total= 2.7min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=297, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0204555524463 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=297, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0204555524463, score=-0.001024, total= 2.0min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=297, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0204555524463 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=297, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0204555524463, score=-0.001233, total= 2.0min


[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed: 144.5min finished


Eg
Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=150, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.029359697726 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=150, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.029359697726, score=-0.008504, total=  46.9s
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=150, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.029359697726 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   47.3s remaining:    0.0s


[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=150, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.029359697726, score=-0.008579, total=  44.8s
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=150, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.029359697726 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  1.5min remaining:    0.0s


[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=150, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.029359697726, score=-0.007810, total=  46.8s
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=150, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.029359697726 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  2.3min remaining:    0.0s


[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=150, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.029359697726, score=-0.007742, total=  45.1s
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=150, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.029359697726 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  3.1min remaining:    0.0s


[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=150, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.029359697726, score=-0.009203, total=  45.6s
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=274, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291 


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  3.9min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=274, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291, score=-0.008678, total=  43.6s
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=274, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  4.6min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=274, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291, score=-0.009412, total=  40.3s
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=274, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:  5.3min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=274, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291, score=-0.008552, total=  41.5s
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=274, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:  6.0min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=274, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291, score=-0.007870, total=  40.3s
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=274, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291 


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:  6.7min remaining:    0.0s


[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=274, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0216897467291, score=-0.009501, total=  40.8s
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=576, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0150489072066 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=576, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0150489072066, score=-0.008342, total=  51.4s
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=576, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0150489072066 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=576, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0150489072066, score=-0.008559, total=  50.3s
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=576, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0150489072066 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=576, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0150489072066, score=-0.007656, total=  50.8s
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=576, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0150489072066 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=576, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0150489072066, score=-0.007484, total=  49.5s
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=576, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0150489072066 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=576, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0150489072066, score=-0.009134, total=  48.9s
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=912, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0845851591287 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=912, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0845851591287, score=-0.008737, total=  46.4s
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=912, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0845851591287 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=912, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0845851591287, score=-0.009364, total=  46.7s
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=912, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0845851591287 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=912, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0845851591287, score=-0.008223, total=  46.6s
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=912, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0845851591287 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=912, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0845851591287, score=-0.008256, total=  54.2s
[CV] meta-catboostregressor__depth=1, meta-catboostregressor__iterations=912, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0845851591287 




[CV]  meta-catboostregressor__depth=1, meta-catboostregressor__iterations=912, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0845851591287, score=-0.010143, total=  59.2s
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=764, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0453912307059 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=764, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0453912307059, score=-0.009803, total= 1.1min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=764, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0453912307059 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=764, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0453912307059, score=-0.010328, total= 1.1min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=764, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0453912307059 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=764, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0453912307059, score=-0.009276, total= 1.1min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=764, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0453912307059 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=764, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0453912307059, score=-0.008302, total= 1.3min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=764, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0453912307059 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=764, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0453912307059, score=-0.010689, total= 1.2min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1597, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0285559873338 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1597, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0285559873338, score=-0.009902, total= 1.2min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1597, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0285559873338 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1597, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0285559873338, score=-0.009819, total= 1.3min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1597, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0285559873338 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1597, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0285559873338, score=-0.009044, total= 1.3min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1597, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0285559873338 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1597, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0285559873338, score=-0.008291, total= 1.3min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1597, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0285559873338 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1597, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0285559873338, score=-0.010341, total= 1.3min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1863, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0818453605319 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1863, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0818453605319, score=-0.010321, total= 2.3min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1863, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0818453605319 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1863, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0818453605319, score=-0.011438, total= 2.0min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1863, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0818453605319 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1863, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0818453605319, score=-0.009852, total= 2.5min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1863, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0818453605319 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1863, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0818453605319, score=-0.009299, total= 4.1min
[CV] meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1863, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0818453605319 




[CV]  meta-catboostregressor__depth=5, meta-catboostregressor__iterations=1863, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0818453605319, score=-0.011086, total= 4.1min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1202, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0294887148543 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1202, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0294887148543, score=-0.009637, total= 1.9min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1202, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0294887148543 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1202, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0294887148543, score=-0.010323, total= 1.3min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1202, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0294887148543 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1202, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0294887148543, score=-0.009084, total= 1.2min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1202, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0294887148543 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1202, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0294887148543, score=-0.008233, total= 1.3min
[CV] meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1202, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0294887148543 




[CV]  meta-catboostregressor__depth=4, meta-catboostregressor__iterations=1202, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0294887148543, score=-0.010278, total= 1.3min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1351, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0406950298702 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1351, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0406950298702, score=-0.010090, total= 2.6min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1351, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0406950298702 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1351, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0406950298702, score=-0.009966, total= 1.3min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1351, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0406950298702 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1351, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0406950298702, score=-0.008935, total= 1.4min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1351, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0406950298702 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1351, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0406950298702, score=-0.008595, total= 1.3min
[CV] meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1351, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0406950298702 




[CV]  meta-catboostregressor__depth=3, meta-catboostregressor__iterations=1351, meta-catboostregressor__l2_leaf_reg=2, meta-catboostregressor__learning_rate=0.0406950298702, score=-0.010586, total= 1.1min
[CV] meta-catboostregressor__depth=2, meta-catboostregressor__iterations=1820, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0328639255866 




[CV]  meta-catboostregressor__depth=2, meta-catboostregressor__iterations=1820, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0328639255866, score=-0.009553, total= 1.1min
[CV] meta-catboostregressor__depth=2, meta-catboostregressor__iterations=1820, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0328639255866 




[CV]  meta-catboostregressor__depth=2, meta-catboostregressor__iterations=1820, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0328639255866, score=-0.009518, total= 1.1min
[CV] meta-catboostregressor__depth=2, meta-catboostregressor__iterations=1820, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0328639255866 




[CV]  meta-catboostregressor__depth=2, meta-catboostregressor__iterations=1820, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0328639255866, score=-0.008671, total= 1.1min
[CV] meta-catboostregressor__depth=2, meta-catboostregressor__iterations=1820, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0328639255866 




[CV]  meta-catboostregressor__depth=2, meta-catboostregressor__iterations=1820, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0328639255866, score=-0.008414, total= 1.1min
[CV] meta-catboostregressor__depth=2, meta-catboostregressor__iterations=1820, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0328639255866 




[CV]  meta-catboostregressor__depth=2, meta-catboostregressor__iterations=1820, meta-catboostregressor__l2_leaf_reg=3, meta-catboostregressor__learning_rate=0.0328639255866, score=-0.009365, total= 1.1min


[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed: 63.8min finished


In [22]:
import pickle
pickle.dump(results2, open( "results2_meta_10iter.pickle", "wb" ))

In [24]:
print(results2[0].best_params_)
print(results2[1].best_params_)
print((np.sqrt(-results2[0].best_score_)+np.sqrt(-results2[1].best_score_))/2,
          np.sqrt(-results2[0].best_score_),np.sqrt(-results2[1].best_score_))

{'meta-catboostregressor__depth': 5, 'meta-catboostregressor__iterations': 297, 'meta-catboostregressor__l2_leaf_reg': 3, 'meta-catboostregressor__learning_rate': 0.020455552446341726}
{'meta-catboostregressor__depth': 3, 'meta-catboostregressor__iterations': 576, 'meta-catboostregressor__l2_leaf_reg': 3, 'meta-catboostregressor__learning_rate': 0.015048907206594003}
0.0621260880417 0.0335057823692 0.0907463937142


In [25]:
#write to csv
%load_ext autoreload
%aimport varie
%autoreload 2
#I use a different model for E and Eg
varie.make_csv2(df_total_train_eval,pd.DataFrame(),df_total_test,
#         (ensemble.RandomForestRegressor(max_depth= 11, max_features='log2', n_estimators= 55),
#          ensemble.RandomForestRegressor(max_depth= 9, max_features='sqrt', n_estimators= 220)),
            (stregr[0],stregr[1]),
         y_col,'mlx.csv',drop=drop_col,columns=['id','E','Eg'],
         new_column_names=['id','formation_energy_ev_natom' ,'bandgap_energy_ev'],change_col_names=True)

E StackingRegressor(meta_regressor=<catboost.core.CatBoostRegressor object at 0x00000135CFC0D588>,
         regressors=[KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='uniform'), SVR(C=0.11206449421619946, cache_size=200, coef0=0.0, degree=3,
  epsilon=0.017254490142290496, gamma='auto', kernel='linear',...e, learning_rate=0.090261933039442271,
         loss='square', n_estimators=313, random_state=None)],
         store_train_meta_features=False, verbose=0)
shapes: (2400, 21) (2400,)




Eg StackingRegressor(meta_regressor=<catboost.core.CatBoostRegressor object at 0x00000135D4E1B6D8>,
         regressors=[KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=2, p=2,
          weights='uniform'), SVR(C=0.0033228622244295914, cache_size=200, coef0=0.0, degree=3,
  epsilon=0.020178772192372886, gamma='auto', kernel='linear...one, learning_rate=0.48442687574110221,
         loss='square', n_estimators=21, random_state=None)],
         store_train_meta_features=False, verbose=0)
shapes: (2400, 21) (2400,)




In [27]:
from mlens.ensemble import SuperLearner
import mlens
from mlens.model_selection import Evaluator
from mlens.metrics import make_scorer
from mlens.metrics import rmse

from mlens.metrics import make_scorer
rmse_scorer = make_scorer(rmse, greater_is_better=False)



learners1=[g[0].best_estimator_ for g in results.values()]
learners2=[g[1].best_estimator_ for g in results.values()]
learners=[learners1,learners2]

# Instantiate the ensemble with 10 folds
#meta_learner1=CatBoostRegressor(iterations=1200,
#                            learning_rate=0.03,
#                            depth=4,
#                            loss_function='RMSE',
#                            eval_metric='RMSE',
##                            random_seed=SEED,
#                            od_type='Iter',
#                            od_wait=50,verbose=False)

#import copy
#meta_learner2=copy.deepcopy(meta_learner1)

#sl1 = SuperLearner(
#    folds=5,
#    verbose=True,
##    scorer=mlens.metrics.rmse
#)
#sl2 = SuperLearner(
#    folds=5,
#    verbose=True,
#    scorer=mlens.metrics.rmse
#)

# Add the base learners and the meta learner
#sl1.add(learners1) 
#sl1.add_meta(meta_learner1)
#sl2.add(learners2) 
#sl2.add_meta(meta_learner2)

#sls=[sl1,sl2]
#evaluator
#evl = Evaluator(make_scorer(mlens.metrics.rmse), cv=5, shuffle=False)
sls=[]
for learner,y in zip(learners,y_col):
    print(y)
    y_train=df_total_train_eval[y].values
    print(X_train.shape,y_train.shape)
    
    #evl.fit(X_train, y_train, sl, {}, n_iter=1)
    from mlens.ensemble import SuperLearner
    
    sl = SuperLearner(
    folds=5,
    verbose=True,
    #    scorer=mlens.metrics.rmse
    )
    
    meta_learner=CatBoostRegressor(iterations=1200,
                            learning_rate=0.03,
                            depth=4,
                            loss_function='RMSE',
                            eval_metric='RMSE',
#                            random_seed=SEED,
                            od_type='Iter',
                            od_wait=50,verbose=False)
    
    sl.add(learner) 
    sl.add_meta(meta_learner)
    # Train the ensemble
    sl.fit(X_train, y_train)
    preds = sl.predict(X_train)
    print(rmse(y_train, preds))
    sls.append(sl)
#    results.append(mlens.metrics.rmse(y_train, ensemble.predict(X_train)),
#                          evl.summary['test_score_mean']['superlearner'],
#                          evl.summary['test_score_std']['superlearner'],
#                          mlens.metrics.rmse(y_test, ensemble.predict(X_test)))

#    print_scores(scores_df, 'mlens')

[MLENS] backend: threading


E
(2400, 21) (2400,)

Fitting 2 layers
Fit complete                        | 00:13:57

Predicting 2 layers
Predict complete                    | 00:00:04
0.0277513309762
Eg
(2400, 21) (2400,)

Fitting 2 layers
Fit complete                        | 00:05:35

Predicting 2 layers
Predict complete                    | 00:00:04
0.0790120302652


In [36]:
#write to csv
%load_ext autoreload
%aimport varie
%autoreload 2
varie.make_csv2(df_total_train_eval,pd.DataFrame(),df_total_test,
#         (ensemble.RandomForestRegressor(max_depth= 11, max_features='log2', n_estimators= 55),
#          ensemble.RandomForestRegressor(max_depth= 9, max_features='sqrt', n_estimators= 220)),
            sls,
         y_col,'sl4.csv',drop=drop_col,columns=['id','E','Eg'],
         new_column_names=['id','formation_energy_ev_natom' ,'bandgap_energy_ev'],change_col_names=True)
#I use a different model for E and Eg
#varie.make_csv2(df_total_train_eval,pd.DataFrame(),df_total_test,
#         (ensemble.RandomForestRegressor(max_depth= 11, max_features='log2', n_estimators= 55),
#          ensemble.RandomForestRegressor(max_depth= 9, max_features='sqrt', n_estimators= 220)),
#            (sl1,sl2),
#         y_col,'sl3.csv',drop=drop_col,columns=['id','E','Eg'],
#         new_column_names=['id','formation_energy_ev_natom' ,'bandgap_energy_ev'],change_col_names=True,fit=False)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
E SuperLearner(array_check=2, backend=None, folds=5,
       layers=[Layer(backend='threading', dtype=<class 'numpy.float32'>, n_jobs=-1,
   name='layer-1', propagate_features=None, raise_on_exception=True,
   random_state=None, shuffle=False,
   stack=[Group(backend='threading', dtype=<class 'numpy.float32'>,
   indexer=FoldIndex(X=None, folds=5, raise_on_ex...rer=None)],
   n_jobs=-1, name='group-1', raise_on_exception=True, transformers=[])],
   verbose=0)],
       model_selection=False, n_jobs=None, raise_on_exception=True,
       random_state=None, sample_size=20, scorer=None, shuffle=False,
       verbose=True)
shapes: (2400, 21) (2400,)

Fitting 2 layers




Fit complete                        | 00:14:50

Predicting 2 layers
Predict complete                    | 00:00:04
Eg SuperLearner(array_check=2, backend=None, folds=5,
       layers=[Layer(backend='threading', dtype=<class 'numpy.float32'>, n_jobs=-1,
   name='layer-1', propagate_features=None, raise_on_exception=True,
   random_state=None, shuffle=False,
   stack=[Group(backend='threading', dtype=<class 'numpy.float32'>,
   indexer=FoldIndex(X=None, folds=5, raise_on_ex...rer=None)],
   n_jobs=-1, name='group-3', raise_on_exception=True, transformers=[])],
   verbose=0)],
       model_selection=False, n_jobs=None, raise_on_exception=True,
       random_state=None, sample_size=20, scorer=None, shuffle=False,
       verbose=True)
shapes: (2400, 21) (2400,)

Fitting 2 layers


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df_test[y_]=y_pred


Fit complete                        | 00:06:41

Predicting 2 layers
Predict complete                    | 00:00:04


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df_test[y_]=y_pred


NameError: name 'sl1' is not defined

In [None]:
## from mlens.ensemble import SuperLearner
import mlens
from mlens.model_selection import Evaluator
from mlens.metrics import make_scorer
from mlens.metrics import rmse



learners1=[grid[0].best_estimator_ for grid in results.values()]
learners2=[grid[1].best_estimator_ for grid in results.values()]

grid_sl=[]

tries=3
#grid search for the meta learner         
for depth, iterations, learning_rate in zip (scipy.stats.randint(1,5).rvs(tries),
                                              scipy.stats.randint(1000,2000).rvs(tries),    
                                              lognuniform(low=-2,high=-1,base=10,size=tries)):
    print(depth, iterations, learning_rate)
    

    # Instantiate the ensemble with 10 folds
    meta_learner1=CatBoostRegressor(iterations=iterations,
                                learning_rate=learning_rate,
                                depth=depth,
                                loss_function='RMSE',
                                eval_metric='RMSE',
    #                            random_seed=SEED,
                                od_type='Iter',
                                od_wait=50,verbose=False)

    import copy
    meta_learner2=copy.deepcopy(meta_learner1)

    sl1 = SuperLearner(
        folds=5,
        verbose=True,
        scorer=mlens.metrics.rmse
    )
    sl2 = SuperLearner(
        folds=5,
        verbose=True,
        scorer=mlens.metrics.rmse
    )

    # Add the base learners and the meta learner
    sl1.add(learners1) 
    sl1.add_meta(meta_learner1)
    sl2.add(learners2) 
    sl2.add_meta(meta_learner2)



    sls=[sl1,sl2]
    #evaluator
    #evl = Evaluator(make_scorer(mlens.metrics.rmse), cv=5, shuffle=False)

    for i,y in enumerate(y_col):
        print(y)
        y_train=df_total_train_eval[y].values
        #print(X_train.shape,y_train.shape)

        #evl.fit(X_train, y_train, sl, {}, n_iter=1)

        # Train the ensemble
        sls[i].fit(X_train, y_train)
        preds = sls[i].predict(X_train)
        print(rmse(y_train, preds))
        
        grid_sl.append(depth, iterations, learning_rate,sls)
        
        
    #    results.append(mlens.metrics.rmse(y_train, ensemble.predict(X_train)),
    #                          evl.summary['test_score_mean']['superlearner'],
    #                          evl.summary['test_score_std']['superlearner'],
    #                          mlens.metrics.rmse(y_test, ensemble.predict(X_test)))

    #    print_scores(scores_df, 'mlens')

2 1430 0.0763640155591
E

Fitting 2 layers




In [None]:
in_layer = SuperLearner(model_selection=True)
in_layer.add(base_learners)

preprocess = [in_layer]

evl = Evaluator(
    scorer,
    cv=2,
    verbose=5,
)

evl.fit(
    X_train, y_train,
    meta_learners,
    param_dicts,
    preprocessing={'meta': preprocess},
    n_iter=5                           # bump this up to do a larger grid search
)


In [35]:
varie.make_csv??

In [None]:
grids_sl=[]
for i,y in enumerate(y_col):
    print(y)
    y_train=df_total_train_eval[y].values
    print(X_train.shape,y_train.shape)

    grid=RandomizedSearchCV(model,param_distributions=params, n_iter=n_iter,cv=cv,verbose=verbose,scoring="neg_mean_squared_error" )

    grid.fit(X_train,y_train)
    grids_sl.append(grid)

    sls[i].fit(X_train, y_train)
    preds = sls[i].predict(X_train)
    print(rmse(y_train, preds))
#    results.append(mlens.metrics.rmse(y_train, ensemble.predict(X_train)),
#                          evl.summary['test_score_mean']['superlearner'],
#                          evl.summary['test_score_std']['superlearner'],
#                          mlens.metrics.rmse(y_test, ensemble.predict(X_test)))

#    print_scores(scores_df, 'mlens')   X_train=df.drop(y_col+drop_col,axis=1).values
    for y in y_col:

    return grids

In [None]:
evaluator = Evaluator()
evaluator.fit(X_train, y_train,sl)

In [None]:
models

In [None]:
ests=[(tag, model[0]) for tag,model in models.items()]
params={tag:model[1] for tag,model in models.items() }

In [None]:
from mlens.model_selection import Evaluator

from scipy.stats import randint

# Here we name the estimators ourselves
#ests = [('gnb', GaussianNB()), ('knn', KNeighborsClassifier())]

# Now we map parameters to these
# The gnb doesn't have any parameters so we can skip it
#pars = {'n_neighbors': randint(2, 20)}
#params = {'knn': pars}

evaluators=[]
for i,y in enumerate(y_col):
    print(y)
    y_train=df_total_train_eval[y].values
    print(X_train.shape,y_train.shape)
    evaluator = Evaluator(rmse_scorer, cv=10,  verbose=1)

    
    evaluator.fit(X_train,y_train, ests, params, n_iter=5)
    evaluators.append(evaluator)

In [None]:
from mlens.metrics import make_scorer
rmse_scorer = make_scorer(rmse, average='micro', greater_is_better=False)

In [None]:
base_learners=[(tag,model[0]) for tag,model in models.items()]
param_dicts_base={tag:model[1] for tag,model in models.items()}
len(param_dicts_base),len(base_learners)

In [None]:
from scipy.stats import uniform, randint
SEED=1
# We consider the following models (or base learners)
gb = XGBRegressor()
ls = Lasso(alpha=1e-6, normalize=True)
el = ElasticNet(alpha=1e-6, normalize=True)
rf = RandomForestRegressor(random_state=SEED)

base_learners = [
    ('ls', ls), ('el', el), ('rf', rf), ('gb', gb)
]

# Put their parameter dictionaries in a dictionary with the
# estimator names as keys
param_dicts_base = {
    'ls':
    {'alpha': uniform(1e-6, 1e-5)},
    'el':
    {'alpha': uniform(1e-6, 1e-5),
     'l1_ratio': uniform(0, 1)
    },
    'gb':
    {'learning_rate': uniform(0.02, 0.04),
     'colsample_bytree': uniform(0.55, 0.66),
     'min_child_weight': randint(30, 60),
     'max_depth': randint(3, 7),
     'subsample': uniform(0.4, 0.2),
     'n_estimators': randint(150, 200),
     'colsample_bytree': uniform(0.6, 0.4),
     'reg_lambda': uniform(1, 2),
     'reg_alpha': uniform(1, 2),
    },
    'rf':
    {'max_depth': randint(2, 5),
     'min_samples_split': randint(5, 20),
     'min_samples_leaf': randint(10, 20),
     'n_estimators': randint(50, 100),
     'max_features': uniform(0.6, 0.3)
    }
}


In [None]:
from sklearn.preprocessing import StandardScaler
from mlens.model_selection import Evaluator
assert(len(base_learners)==len(param_dicts_base))
from sklearn.metrics import mean_absolute_error
scorer = make_scorer(mean_absolute_error, greater_is_better=False)

evl = Evaluator(
    scorer,
    cv=2,
    random_state=SEED,
    verbose=5,
)



evl.fit(
    X_train, y_train,
    estimators=base_learners,
    param_dicts=param_dicts_base,
    preprocessing={'sc': [StandardScaler()], 'none': []},
    n_iter=2  # bump this up to do a larger grid search
)




In [None]:
pd.DataFrame(evl.results)

In [None]:
meta_learners = [
    ('gb', gb), ('el', el)
]

param_dicts = {
    'el':
    {'alpha': uniform(1e-5, 1),
     'l1_ratio': uniform(0, 1)
    },
    'gb':
    {'learning_rate': uniform(0.01, 0.2),
     'subsample': uniform(0.5, 0.5),
     'reg_lambda': uniform(0.1, 1),
     'n_estimators': randint(10, 100)
    },
}


# Put the layers you don't want to tune into an ensemble with model selection turned on
# Just remember to turn it off when you're done!





In [None]:
len(meta_learners), len(param_dicts)

In [None]:
in_layer = SuperLearner(model_selection=True)
in_layer.add(base_learners)

preprocess = [in_layer]

evl.fit(
    X_train, y_train,
    meta_learners,
    param_dicts,
#    preprocessing={'meta': preprocess},
    n_iter=5                           # bump this up to do a larger grid search
)



In [None]:
pd.DataFrame(evl.results)

In [None]:
def lognuniform(low=0, high=1, size=None, base=np.exp(1)):
    return np.power(base, np.random.uniform(low, high, size))

In [None]:
uniform.rvs?

In [None]:
np.random.uniform?

In [None]:
scipy.stats.uniform?

In [None]:
from sympy.stats import *
x = Symbol('x')
X = ContinuousRV(x, 2*x, Interval(0, 1))

P(X>.5) 

Var(X) # variance

E(2*cos(X)+X**2) # complex expressions are ok too
