In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb
import catboost as cb
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP

import types

Using TensorFlow backend.


In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
# pd.set_option('display.max_columns', 2000)
# pd.set_option('display.width', 2000)
# pd.set_option('display.expand_frame_repr', True)
pd.set_option('display.max_colwidth', -1)

In [4]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [5]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [6]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [7]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [8]:
# mytrial = []
mytrial = list(pd.read_pickle('../trial/mystacknet.pkl').T.to_dict().values())
df_trial = pd.DataFrame(mytrial)
len(mytrial)

24

In [13]:
df_trial_catboost = pd.read_pickle('../trial/catboost.pkl')
df_trial_catboost.loc[452:452][['datetime','remark','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
452,2019-05-16 06:46:11.662876,tune 437,30,1.853478,5e-06,1.900535,7.2e-05,0.047056


In [10]:
df_trial_xgbm = pd.read_pickle('../trial/xgbm.pkl')
df_trial_xgbm.loc[1172:1172][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1172,2019-05-16 09:18:54.750120,tune 1099,25,1.924779,1.6e-05,1.96556,6.6e-05,0.040782


In [11]:
df_trial_lgbm = pd.read_pickle('../trial/lgbm.pkl')
df_trial_lgbm.loc[2156:2156][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
2156,2019-05-16 22:30:28.865774,tune 2025,25,1.911171,2e-05,1.952897,0.000129,0.041726


In [12]:
df_trial_randomforest = pd.read_pickle('../trial/randomforest.pkl')
df_trial_randomforest.loc[297:297][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
297,2019-05-16 12:50:31.611238,tune 239,80,2.080347,3e-06,2.086208,6.3e-05,0.00586


In [14]:
df_trial_extratrees = pd.read_pickle('../trial/extratrees.pkl')
df_trial_extratrees.loc[459:459][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
459,2019-05-16 11:30:08.432873,tune 427,70,2.109147,1.1e-05,2.110347,2.5e-05,0.0012


In [15]:
df_trial_gradientboosting = pd.read_pickle('../trial/gradientboosting.pkl')
df_trial_gradientboosting.loc[306:306][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
306,2019-05-16 14:16:23.815368,tune 220,70,1.914761,1.9e-05,1.951837,0.000144,0.037076


In [16]:
df_trial_knn = pd.read_pickle('../trial/knn.pkl')
df_trial_knn.loc[17:17][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
17,2019-05-16 23:11:48.201772,,7,2.026411,0.002031,2.081674,0.013139,0.055264


In [19]:
df_trial_svr = pd.read_pickle('../trial/svr.pkl')
df_trial_svr.loc[7:7][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
7,2019-05-16 23:57:06.120463,,7,2.050366,0.003073,2.092532,0.01549,0.042166


In [None]:
df_trial_fm = pd.read_pickle('../trial/fm.pkl')
df_trial_fm.loc[0:0][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

In [17]:
df_trial_lasso = pd.read_pickle('../trial/lasso.pkl')
df_trial_lasso.loc[8:8][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
8,2019-05-16 11:54:52.999400,,165,2.106014,0.002088,2.156098,0.008084,0.050085


In [18]:
df_trial_ridge = pd.read_pickle('../trial/ridge.pkl')
df_trial_ridge.loc[15:15][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
15,2019-05-16 11:27:58.322760,,165,2.14516,0.002662,2.180493,0.012586,0.035332


In [21]:
columns_ = df_trial_catboost.loc[452]['param']['columns']+
df_trial_xgbm.loc[1172]['param']['columns']+
df_trial_lgbm.loc[2156]['param']['columns']+
df_trial_randomforest.loc[297]['param']['columns']+
df_trial_extratrees.loc[459]['param']['columns']+
df_trial_gradientboosting.loc[306]['param']['columns']

unique_columns = list(set(columns_))
common_columns = []
main_columns = []
for col in unique_columns:
    if columns_.count(col)==6:
        common_columns.append(col)
    if columns_.count(col)>int(6*.8):
        main_columns.append(col)
len(unique_columns), len(common_columns),len(main_columns)

(165, 7, 14)

In [22]:
columns = df_trial_catboost.loc[452]['param']['columns'].copy()

In [229]:
unique_columns

["change_quantiles{'ql': 0.4, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'spkt_welch_density__coeff_65',
 'q05_roll_std_10',
 'Hilbert_mean_2',
 'min__roll_std',
 'spkt_welch_density__coeff_4',
 'abs_max_6',
 'spkt_welch_density__coeff_99',
 'abs_q01_7',
 'abs_max_8',
 'kurt_7',
 'partial_autocorrelationlag_5',
 'q05_2',
 'abs_max_roll_mean_100',
 'spkt_welch_density__coeff_50',
 'abs_q01_6',
 'ave10_6',
 "binned_entropy{'max_bins': 10}",
 'min_roll_std_100',
 'spkt_welch_density__coeff_113',
 'med_6',
 'spkt_welch_density__coeff_42',
 '4th_peak_psd',
 '5000min_quantile05',
 'fft_coefficientcoeff_36__attr_"abs"',
 'spkt_welch_density__coeff_41',
 'q01_roll_std_100',
 'max_to_min',
 'spkt_welch_density__coeff_17',
 'abs_min_5',
 'q25_roll_std_100',
 '5000skewness_mean_',
 "change_quantiles{'ql': 0.2, 'qh': 0.4, 'isabs': True, 'f_agg': 'mean'}",
 'peak_to_average_power_ratio__roll_mean',
 'fft_coefficientcoeff_20__attr_"abs"',
 'fft_coefficientcoeff_50__attr_"abs"',
 "change_quantiles

In [226]:
columns

['spkt_welch_density__coeff_3',
 'abs_q25_5',
 'q25_roll_std_100',
 'abs_q75_6',
 'abs_q75_7',
 'spkt_welch_densitycoeff_2',
 'abs_q01_4',
 'iqr_6',
 'q05_roll_std_100',
 'q05_roll_std_1000',
 'median__roll_std',
 'abs_q01_5',
 "number_peaks{'n': 10}",
 'FFT_Mag_75q0',
 "value_count{'value': 1}",
 'q01_roll_std_100',
 'abs_q95_2',
 'abs_q95_6',
 'MA_1000MA_std_mean_7',
 'q05_roll_std_10',
 'q01_roll_std_1000',
 'abs_max_roll_mean_1000',
 'abs_q75_2',
 'abs_q05_6',
 '5000std_quantile25',
 "number_crossing_m{'m': 1}",
 "autocorrelation{'lag': 5}",
 'q75_roll_std_10',
 'q05_2',
 '5000smoothness_quantile05']

In [227]:
common_columns

['q25_roll_std_100',
 'abs_q25_5',
 'iqr_6',
 'abs_q01_4',
 'abs_q75_7',
 'spkt_welch_density__coeff_3',
 'spkt_welch_densitycoeff_2']

In [228]:
main_columns

['q25_roll_std_100',
 'abs_q25_5',
 'q05_roll_std_1000',
 'abs_q95_2',
 'abs_q75_6',
 'iqr_6',
 "autocorrelation{'lag': 5}",
 'median__roll_std',
 'abs_q01_4',
 'q05_roll_std_100',
 'abs_q75_7',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'spkt_welch_density__coeff_3',
 'spkt_welch_densitycoeff_2']

In [38]:
trial_lst = [
    df_trial_catboost.loc[452], df_trial_xgbm.loc[1172], df_trial_lgbm.loc[2156],df_trial_gradientboosting.loc[306],
             df_trial_randomforest.loc[297],df_trial_extratrees.loc[459],
            df_trial_knn.loc[17], df_trial_svr.loc[0],
            df_trial_lasso.loc[8], df_trial_ridge.loc[15],
    df_trial_fm.loc[4]
            ]
name_lst = [
    'catboost342', 'xgbm865', 'lgbm1398',
    'gradientboosting83','randomforest122','extratrees250',
    'knn4','svr0','lasso4','ridge3'
]

df_train_stacknet = pd.DataFrame()
df_test_stacknet = pd.DataFrame()
for df_, name_ in zip(trial_lst, name_lst):
    try:
        df_test_stacknet_i = df_['df_test_pred']
        df_test_stacknet[name_] = np.mean(df_test_stacknet_i.drop(columns=['index']).values, axis=1)
        
        df_train_stacknet_i = df_['df_valid_pred']
        df_train_stacknet[name_] = df_train_stacknet_i['predict']
        
    except Exception as e:
        print(name_, 'exception')

df_train_stacknet['index'] = df_train_stacknet_i['index']
df_train_stacknet = pd.merge(df_train_stacknet, df_train[['y','index', 'group','label']+columns], on='index')
df_test_stacknet['index'] = df_test_stacknet_i['index']
df_test_stacknet = pd.merge(df_test_stacknet, df_test[['index']+columns], on='index')

In [39]:
param = {'columns': name_lst+columns,
 'kfold': {'n_splits': 8,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},#stratified
 'scaler': {'cls': 'StandardScaler'},
 'algorithm': 
         
# {'cls': 'xgb.XGBRegressor',
#  'init': {'max_depth': 3,
#   'max_bin': 38,
#   'eta': 0.27801915385245873,
#   'colsample_bytree': 0.9416983653127328,
#   'min_child_weight': 238,
#   'n_estimators': 165,
#   'subsample': 0.7471829960670435,
#   'reg_lambda': 0.6813060508093833,
#   'reg_alpha': 0.36085980027529035,
#   'n_jobs': 16},
#  'fit': {'eval_metric': 'mae', 'verbose': False, 'early_stopping_rounds': 200}},
         
         [{'cls': 'cb.CatBoostRegressor',
  'init': {'num_trees': 589,
   'depth': 6,
   'learning_rate': 0.05293979792364842,
   'l2_leaf_reg': 78.065140245968,
   'bagging_temperature': 0.9302786271852079,
   'random_strength': 0.4247048326178351,
   'random_state': 651},
  'fit': {'verbose': False, 'early_stopping_rounds': 200}},]
         
#          {'cls': 'lgb.LGBMRegressor',
#  'init': {'learning_rate': 0.17076106120259138,
#   'feature_fraction': 0.6842101917408698,
#   'bagging_fraction': 0.8986268312800509,
#   'min_data_in_leaf': 243,
#   'lambda_l1': 4.612300279009062,
#   'lambda_l2': 97.21686371760525,
#   'max_bin': 28,
#   'num_leaves': 11,
#   'random_state': 6805,
#   'n_jobs': 32},
#  'fit': {'eval_metric': 'mae', 'verbose': False, 'early_stopping_rounds': 200}}
         
 'feature_importance': {'is_output': True,
  'permutation_feature_importance': True,
  'permutation_random_state': 1}}

In [40]:
# run one try
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train_stacknet, param, df_test = df_test_stacknet, trial=mytrial, remark='|'.join(name_lst)+'->catboost')

In [1]:
# EP.evaluate(df_feature_importances)
# df_feature_importances.sort_values(by=['average_model_weight'], ascending=False)

In [210]:
EP.select_features_(df_train_stacknet, param, mytrial, df_test=df_test_stacknet, nfeats_best=1, nfeats_removed_per_try=1, key='average_model_weight')

In [None]:
def stack_net_regressor(param_net, metric="mae", restacking_columns=[], use_retraining=True, random_state=42, n_jobs=1, verbose=1)

    df_train_currentlv = df_train.drop(columns=['1dcnnlogmel']).copy()
    df_test_currentlv = df_test.copy()
    for lvl,  (models, params) in enumerate(zip(stacknet, paramnet)):
        print('=============level:{}==========='.format(lvl+1))
        df_train_nextlv = pd.DataFrame()
        df_test_nextlv = pd.DataFrame()
        columns = df_train_currentlv.columns.drop(['y','index']).tolist()
        for m, p in zip(models, params):
            df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train_stacknet, param, df_test = df_test_stacknet, trial=mytrial, remark='|'.join(name_lst)+'->catboost')
            
            print(m.__name__, val_mae, train_mae)
            df_train_nextlv['{}'.format(m.__name__)] = df_train_pred.sort_values(by=['index'])['predict'].values
            df_test_nextlv['{}'.format(m.__name__)] = df_test_pred.sort_values(by=['index'])[np.arange(5)].mean(axis=1).values
        df_train_nextlv['index'] = df_train_currentlv['index']
        df_train_nextlv['y'] = df_train_currentlv['y']
        df_test_nextlv['index'] = df_test_currentlv['index']
        df_train_currentlv = df_train_nextlv.copy()
        df_test_currentlv = df_test_nextlv.copy()

In [None]:

def process2(df_train, df_test, columns, models, splits):
    
    trial = []
    X_train,y_train = df_train[columns].values, df_train['y'].values
    X_test = df_test[columns].values
    scaler = StandardScaler()
    scaler.fit(np.concatenate([X_train, X_test], axis=0))
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    for fold_n, (train_index, valid_index) in enumerate(splits):
        for m in models:
            m.fit(X_train[train_index], y_train[train_index])
            y_train_pred = m.predict(X_train[train_index])
            y_valid_pred = m.predict(X_train[valid_index])
            metric_train = mean_absolute_error(y_train[train_index], y_train_pred)
            metric_valid = mean_absolute_error(y_train[valid_index], y_valid_pred)
            trial.append({'algorithm':m.__class__.__name__, 'fold_n':fold_n, 'n_columns':len(columns), 'metric_train':metric_train, 'metric_valid':metric_valid, 'y_train_pred':y_train_pred, 'y_valid_pred':y_valid_pred, 'columns':columns})
            
    return pd.DataFrame(trial) 
    

def stacknet_fit_predict (df_train, df_test, param, trial=None, remark=None, restacking=True, verbose=1):

    previous_input_columns = []
    current_input_columns = param_net['columns'].copy()
    
    if param['kfold']['type'] == 'stratified':
        assert 'label' in df_train.columns.tolist(), 'label is not in df_train'
        folds = StratifiedKFold(n_splits=param['kfold']['n_splits'], shuffle=param['kfold']['shuffle'], random_state=param['kfold']['random_state'])
        splits = list(folds.split(df_train, df_train['label']))
    elif param['kfold']['type'] == 'group':
        assert 'group' in df_train.columns.tolist(), 'group is not in df_train'
        folds = GroupKFold(n_splits=param['kfold']['n_splits'])
        splits = list(folds.split(df_train, groups=df_train['group']))
    elif param['kfold']['type'] == 'timeseries':
        folds = TimeSeriesSplit(n_splits=param['kfold']['n_splits'])
        splits = list(folds.split(df_train))
    else:
        folds = KFold(n_splits=param['kfold']['n_splits'], shuffle=param['kfold']['shuffle'], random_state=param['kfold']['random_state'])
        splits = list(folds.split(df_train))
        
    algorithm_param = param['algorithm'].copy()

    ##start the level training 
    for level in range(algorithm_param):   
            
        this_level_algorithm_param = algorithm_param[level] 
        


        for fold_n, (train_index, valid_index) in enumerate(splits):
            
                        m.fit(X_train[train_index], y_train[train_index])
            y_train_pred = m.predict(X_train[train_index])
            y_valid_pred = m.predict(X_train[valid_index])
            metric_train = mean_absolute_error(y_train[train_index], y_train_pred)
            metric_valid = mean_absolute_error(y_train[valid_index], y_valid_pred)
            
            
            if len(previous_input_columns) > 0 and restacking:
                current_input_columns = previous_input_columns + current_input_columns
            X_train = df_train[current_input_columns]
            for param in this_level_algorithm_param:
                
                
                
                

            #print ( i, i, i)
            metrics_i=[0.0 for k in range(len(this_level_models))]

            X_train, X_cv = current_input[train_index], current_input[test_index]
            y_train, y_cv = y[train_index], y[test_index]
            w_train,w_cv=None,None


            all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(delayed(_parallel_build_estimators)(clone(this_level_models[d]),X_train,
                    y_train,
                    w_train, d)
                for d in range(len(this_level_models)))

            # Reduce
            this_level_estimators_ = [ [t[0],t[1]] for t in all_results]

            this_level_estimators_=sorted(this_level_estimators_, key=operator.itemgetter(1), reverse=False)

            if self.use_retraining==False:
                fitted_estimators=[t[0] for t in  this_level_estimators_]
                if i==0:
                    self.estimators_.append([fitted_estimators]) #add level
                else :
                    self.estimators_[level].append(fitted_estimators)

            #parallel predict
            all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
                delayed(_parallel_predict_proba)(
                    this_level_estimators_[d][0],
                    X_cv,d)                
                for d in range(len(this_level_models)))
            this_level_predictions_ = [ [t[0],t[1]] for t in all_results]

            this_level_predictions_=sorted(this_level_predictions_, key=operator.itemgetter(1), reverse=False) 
            predictions_=[t[0] for t in  this_level_predictions_]

            for d in range (len(this_level_models)):
                this_model=this_level_models[d]
                if hasattr(this_model, 'predict') :
                    metrics_i[d]=self.metric(y_cv,predictions_[d], sample_weight=w_cv)
                    metrics[d]+=metrics_i[d]
                    if self.verbose>0:
                        print ("Fold %d/%d , model %d , %s===%f " % (i+1, iter_count, d, self.metric_name, metrics_i[d]))
                elif  predictions_[d].shape==y_cv.shape  :                       
                    metrics_i[d]=self.metric(y_cv,predictions_[d], sample_weight=w_cv)
                    metrics[d]+=metrics_i[d]
                    if self.verbose>0:
                        print ("Fold %d/%d , model %d , %s===%f " % (i+1, iter_count, d, self.metric_name, metrics_i[d]))


            #concatenate predictions  
            preds_concat_=np.column_stack( predictions_)
            #print ("preds_concat_.shape", preds_concat_.shape)
            if type(train_oof) is type(None):
                train_oof=np.zeros ( (current_input.shape[0], preds_concat_.shape[1]))
                self._level_dims.append(preds_concat_.shape[1])


            if self._level_dims[level]!=preds_concat_.shape[1]:
                raise Exception ("Output dimensionality among folds is not consistent as %d!=%d " % ( self._level_dims[level],preds_concat_.shape[1]))
            train_oof[test_index] = preds_concat_
            if self.verbose>0:
                print ("=========== end of fold %i in level %d ===========" %(i+1,level))
            i+=1

        metrics=np.array(metrics)
        metrics/=float(iter_count)

        if self.verbose>0:
            for d in range(len(this_level_models)):
                this_model=this_level_models[d]
                if hasattr(this_model, 'predict_proba') :
                     print ("Level %d, model %d , %s===%f " % (level, d, self.metric_name, metrics[d]))


        #done cv

        if self.use_retraining:

            all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
                delayed(_parallel_build_estimators)(
                    clone(this_level_models[d]),
                    current_input,
                    y,
                    sample_weight, d)
                for d in range(len(this_level_models)))              


            this_level_estimators_ = [ [t[0],t[1]] for t in all_results]

            this_level_estimators_=sorted(this_level_estimators_, key=operator.itemgetter(1), reverse=False)

            fitted_estimators=[t[0] for t in  this_level_estimators_]

            self.estimators_.append([fitted_estimators]) #add level   


        previous_input=current_input
        current_input=train_oof
        if self.verbose>0:
            print ("Output dimensionality of level %d is %d " % ( level,current_input.shape[1] ))             



        end_of_level_time=time.time()
        if self.verbose>0:            
            print ("====================== End of Level %d ======================" % (level))  
            print (" level %d lasted %f seconds " % (level,end_of_level_time-start_level_time ))

    end_of_fit_time=time.time()        
    if self.verbose>0:          

        print ("====================== End of fit ======================")  
        print (" fit() lasted %f seconds " % (end_of_fit_time-start_time )) 

In [24]:
# df_trial.loc[23,'remark']='catboost342|xgbm865|lgbm1398|randomforest122|extratrees250|gradientboosting83->xgbm'
# mytrial = list(df_trial.T.to_dict().values())

In [225]:
# df_trial.to_pickle('../trial/mystacknet.pkl')

In [47]:
df_trial = pd.DataFrame(mytrial)
df_trial['kfold-type'] = df_trial['param'].apply(lambda x: x['kfold']['type'])
df_trial[['datetime','remark', 'kfold-type', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,kfold-type,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
0,2019-05-13 00:25:17.222382,,stratified,3,1.856642,1e-05,1.863103,0.000308,0.00646
1,2019-05-13 00:29:56.214032,,,3,1.855961,7e-06,1.863527,0.000309,0.007566
2,2019-05-13 00:33:24.920561,,stratified,33,1.770134,5e-06,1.814401,0.000207,0.044267
3,2019-05-13 03:10:35.057918,,stratified,3,1.851728,4e-06,1.860129,0.000447,0.008401
4,2019-05-13 03:11:42.825614,,stratified,33,1.747469,6e-06,1.792307,0.000265,0.044838
5,2019-05-13 06:12:18.673488,,stratified,50,1.727325,1e-05,1.780657,0.000212,0.053333
6,2019-05-13 06:25:53.650724,,stratified,15,1.784024,5e-06,1.81603,0.000338,0.032006
7,2019-05-13 06:52:23.907165,catboost342|xgboost865|lgbm1398->catboost,stratified,33,1.744401,1.6e-05,1.784729,0.00022,0.040328
8,2019-05-13 07:16:06.946028,catboost342|xgboost865|lgbm1398->catboost,stratified,50,1.727243,2.8e-05,1.772981,0.000192,0.045737
9,2019-05-13 07:17:58.811960,catboost342|xgboost865|lgbm1398->catboost,stratified,15,1.772729,1e-05,1.803558,0.000273,0.03083


In [49]:
idx=27
df_test_pred = df_trial.loc[idx]['df_test_pred']
df_submit = pd.DataFrame()
df_submit['time_to_failure'] = np.mean(df_test_pred.drop(columns=['index']).values, axis=1)
df_submit['seg_id'] = df_test_pred['index']
df_submit.to_csv('submission_mystacknet_{}.csv'.format(idx), index=False)

In [44]:
# #  tune hypterparameters
# def objective(trial):
        
#     num_trees = trial.suggest_int('num_trees', 200, 1000)
#     depth = trial.suggest_int('depth', 2, 10)
#     learning_rate = trial.suggest_uniform('learning_rate', 0.01, 0.4)
#     l2_leaf_reg = trial.suggest_uniform('l2_leaf_reg', 0.001, 100)
#     bagging_temperature = trial.suggest_uniform('bagging_temperature', .6, 1)
#     random_strength = trial.suggest_uniform('random_strength', .001, 1)
#     random_state = trial.suggest_int('random_state', 1, 9999)
        
#     args={
#         'columns':name_lst+columns,
#         'kfold':{
#             'n_splits': 8,
#             'random_state': 1985,
#             'shuffle': True,
#             'type': 'stratified'
#         },
#         'scaler':{
#             'cls':'StandardScaler',
#         },
#         'algorithm':{
#             'cls':'cb.CatBoostRegressor',
#             'init':{
#                 "num_trees":num_trees,
#                 "depth":depth,
#                 "learning_rate":learning_rate,
#                 "l2_leaf_reg":l2_leaf_reg,
#                 "bagging_temperature":bagging_temperature,
#                 "random_strength":random_strength,
#                 "random_state":random_state,
#             },
#             'fit':{
# #                 'eval_metric':'mae',
#                 'verbose':False,
#                 'early_stopping_rounds':200,
#             },
#         },
#         'feature_importance':{
#             'is_output':False,
#             'permutation_feature_importance':False,
#             'permutation_random_state':1,
#         },
#     }
    
#     df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train_stacknet, args, df_test = df_test_stacknet, trial=mytrial, remark='tune mystacknet12')
#     val_mae_mean = np.mean(df_his.valid)
#     val_mae_var = np.var(df_his.valid)
#     train_mae_mean = np.mean(df_his.train)
#     train_mae_var = np.var(df_his.train)
    
#     trial.set_user_attr('val_mae', val_mae_mean)
#     trial.set_user_attr('train_mae', train_mae_mean)
#     trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
#     trial.set_user_attr('val_mae_var', val_mae_var)

#     return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

# study = optuna.create_study()
# study.optimize(objective, n_trials=50)

In [48]:
df_submit = pd.DataFrame()
df_submit['time_to_failure'] = np.mean(df_test_pred.drop(columns=['index']).values, axis=1)
df_submit['seg_id'] = df_test_pred['index']
df_submit.to_csv('submission_mystacknet_2.csv', index=False)

In [13]:
catboost_param = {
    'num_trees':500,
    'learning_rate':0.05,
    'depth':6,
    'l2_leaf_reg':.01,
    'bagging_temperature':1,
    'random_strength':.1,
}

xgboost_param = {
    'eta':0.4,
    'colsample_bytree':0.8,
    'subsample':0.8,
    'min_child_weight':400,
    'lambda':0,
    'alpha':0,
    'max_bin':40,
    'max_depth':2,
    'n_estimators':200,
}

lightgbm_param = {
    'learning_rate':0.35,
    'feature_fraction':0.8,
    'bagging_fraction':0.8,
    'min_data_in_leaf':600,
    'lambda_l1':1,
    'lambda_l2':1,
    'max_bin':40,
    'max_depth':2,
}

none_param = {
}

knn_param = {
    'n_neighbors':30,
}

In [19]:
paramnet = [[none_param, catboost_param, xgboost_param, lightgbm_param],[none_param]]

In [20]:
def StackNetRegressor(models, metric="mae", folds=folds, restacking=restacking, use_retraining=use_retraining, random_state=12345, n_jobs=1, verbose=1):
    df_train_currentlv = df_train.drop(columns=['1dcnnlogmel']).copy()
    df_test_currentlv = df_test.copy()
    for lvl,  (models, params) in enumerate(zip(stacknet, paramnet)):
        print('=============level:{}==========='.format(lvl+1))
        df_train_nextlv = pd.DataFrame()
        df_test_nextlv = pd.DataFrame()
        columns = df_train_currentlv.columns.drop(['y','index']).tolist()
        for m, p in zip(models, params):
            val_mae, train_mae, _, df_train_pred, df_test_pred = process(regressor_cls=m, df_train=df_train_currentlv, columns=columns, param=p, df_test=df_test_currentlv, is_output_feature_importance=False)
            print(m.__name__, val_mae, train_mae)
            df_train_nextlv['{}'.format(m.__name__)] = df_train_pred.sort_values(by=['index'])['predict'].values
            df_test_nextlv['{}'.format(m.__name__)] = df_test_pred.sort_values(by=['index'])[np.arange(5)].mean(axis=1).values
        df_train_nextlv['index'] = df_train_currentlv['index']
        df_train_nextlv['y'] = df_train_currentlv['y']
        df_test_nextlv['index'] = df_test_currentlv['index']
        df_train_currentlv = df_train_nextlv.copy()
        df_test_currentlv = df_test_nextlv.copy()
    return 

CatBoostRegressor 1.7983494430465483 1.7602843741882388
XGBRegressor 1.8250859527951513 1.7936942696351459
LGBMRegressor 1.8315016252390983 1.81365957491913
Ridge 1.8000908886663367 1.7989752841122517


In [7]:
xgbm_columns = ['q25_roll_std_100',
 'abs_q01_4',
 'spkt_welch_density__coeff_3',
 'q05_roll_std_100',
 'abs_q25_5',
 'spkt_welch_densitycoeff_2',
 'iqr_6',
 "number_peaks{'n': 10}",
 'q05_roll_std_1000',
 'abs_q75_6',
 'abs_q95_2',
 'median__roll_std',
 'q05_5',
 'abs_q75_7',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'spkt_welch_density__coeff_25',
 'spkt_welch_density__coeff_4',
 'abs_max_1',
 '5000std_quantile05',
 'agg_autocorrelationf_agg_"mean"__maxlag_40',
 'Hilbert_mean_2',
 'FFT_Mag_75q0',
 '5000smoothness_entropy_',
 'MA_1000MA_std_mean_7',
 "number_peaks{'n': 5}"]

lightgbm_columns = ['q25_roll_std_100',
 'abs_q01_4',
 'q05_roll_std_1000',
 'abs_q25_5',
 'spkt_welch_densitycoeff_2',
 "number_peaks{'n': 10}",
 'spkt_welch_density__coeff_3',
 'iqr_6',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'abs_q75_7',
 '5000clearance_factor_quantile25',
 '5000smoothness_std_',
 'q05_roll_std_100',
 'spkt_welch_density__coeff_25',
 'abs_q75_6',
 'abs_max_1',
 'median__roll_std',
 'q05_roll_std_10',
 'abs_q05_7',
 "autocorrelation{'lag': 5}",
 'abs_max_7',
 'FFT_Mag_95q0',
 'abs_max_8',
 '5000smoothness_entropy_',
 'spkt_welch_density__coeff_42']

catboost_columns=['spkt_welch_density__coeff_3',
 'abs_q25_5',
 'q25_roll_std_100',
 'abs_q75_6',
 'abs_q75_7',
 'spkt_welch_densitycoeff_2',
 'abs_q01_4',
 'iqr_6',
 'q05_roll_std_100',
 'q05_roll_std_1000',
 'median__roll_std',
 'abs_q01_5',
 "number_peaks{'n': 10}",
 'FFT_Mag_75q0',
 "value_count{'value': 1}",
 'q01_roll_std_100',
 'abs_q95_2',
 'abs_q95_6',
 'MA_1000MA_std_mean_7',
 'q05_roll_std_10',
 'q01_roll_std_1000',
 'abs_max_roll_mean_1000',
 'abs_q75_2',
 'abs_q05_6',
 '5000std_quantile25',
 "number_crossing_m{'m': 1}",
 "autocorrelation{'lag': 5}",
 'q75_roll_std_10',
 'q05_2',
 '5000smoothness_quantile05']

In [11]:
columns_ = xgbm_columns+lightgbm_columns+catboost_columns
columns = list(set(columns_))
common_columns = []
for col in columns:
    if(columns_.count(col)==3):
        common_columns.append(col)

len(set(columns_)), len(common_columns)

(54, 13)

In [24]:
hyper_parameters_range={
'learning_rate':(.01, .5),
'feature_fraction':(.6, 1),
'bagging_fraction':(0.6, 1),
'min_data_in_leaf':(200,800),
'lambda_l1':(1e-6, 1e2),
'lambda_l2':(1e-6, 1e2),
'max_bin':(10, 100),
'num_leaves':(4, 128),
'random_state':(1, 9999),
}

In [26]:
        learning_rate = trial.suggest_uniform('learning_rate', .01, .5)
        feature_fraction = trial.suggest_uniform('feature_fraction', .6, 1)
        bagging_fraction = trial.suggest_uniform('bagging_fraction', 0.6, 1)
        min_data_in_leaf = trial.suggest_int('min_data_in_leaf', 200,800)
        lambda_l1 = trial.suggest_uniform('lambda_l1', 1e-6, 1e2)
        lambda_l2 = trial.suggest_uniform('lambda_l2', 1e-6, 1e2)
        max_bin = trial.suggest_int('max_bin', 10, 100)
    #     max_depth = trial.suggest_int('max_depth', 2, 6)
        num_leaves = trial.suggest_int('num_leaves', 4, 128)
        random_state = trial.suggest_int('random_state', 1, 9999)
        
        args={
            'columns':columns,
            'kfold':{
                'n_splits': 8,
                'random_state': 1985,
                'shuffle': True,
                'type': 'stratified'
            },
            'scaler':{
                'cls':'StandardScaler',
            },
            'algorithm':{
                'cls':'lgb.LGBMRegressor',
                'init':{
                    'learning_rate':learning_rate,
                    'feature_fraction':feature_fraction,
                    'bagging_fraction':bagging_fraction,
                    'min_data_in_leaf':min_data_in_leaf,
                    'lambda_l1':lambda_l1,
                    'lambda_l2':lambda_l2,
                    'max_bin':max_bin,
                    'num_leaves':num_leaves,
                    'random_state':random_state,
                    'n_jobs':32
                },
                'fit':{
                    'eval_metric':'mae',
                    'verbose':False,
                    'early_stopping_rounds':200,
                },
            },
            'feature_importance':{
                'is_output':False,
                'permutation_feature_importance':False,
                'permutation_random_state':1,
            },
        }

learning_rate 0.01 0.5
feature_fraction 0.6 1
bagging_fraction 0.6 1
min_data_in_leaf 200 800
lambda_l1 1e-06 100.0
lambda_l2 1e-06 100.0
max_bin 10 100
num_leaves 4 128
random_state 1 9999


In [67]:
args = param.copy()
args['columns']=[123]

In [68]:
param

{'columns': ['abs_max_8',
  'abs_max_4',
  'abs_max_3',
  'q05_5',
  'MA_1000MA_std_mean_7',
  "autocorrelation{'lag': 5}",
  'max_to_min',
  'spkt_welch_density__coeff_4',
  'Hilbert_mean_2',
  'abs_max_roll_mean_1000',
  'min_roll_mean_100',
  'spkt_welch_density__coeff_25',
  'q75_roll_mean_10',
  "number_peaks{'n': 5}",
  'abs_q05_7',
  'abs_max_5',
  'FFT_Mag_95q0',
  'abs_q75_6',
  'Hilbert_mean_6',
  'spkt_welch_density__coeff_3',
  "number_peaks{'n': 10}",
  "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
  'abs_max_1',
  'q05_2',
  'abs_q01_4',
  'abs_max_2',
  'abs_max_6',
  'q25_roll_std_100',
  'abs_q75_7',
  '5000clearance_factor_quantile25',
  'iqr_6',
  'abs_q25_5',
  '5000smoothness_entropy_',
  '5000peak_peak_amp_max_',
  'min_roll_std_100',
  'min__roll_std',
  'spkt_welch_density__coeff_42',
  '5000smoothness_std_',
  'median__roll_std',
  '5000std_quantile05',
  'abs_q95_2',
  'abs_max_7',
  "number_crossing_m{'m': 1}",
  'q05_roll_std_10',

In [79]:
# hyper_parameters_range:{
# 'learning_rate':(.01, .5)
# 'feature_fraction':(.6, 1)
# 'bagging_fraction':(0.6, 1)
# 'min_data_in_leaf':(200,800)
# 'lambda_l1':(1e-6, 1e2)
# 'lambda_l2':(1e-6, 1e2)
# 'max_bin':(10, 100)
# 'num_leaves':(4, 128)
# 'random_state':(1, 9999)
# }
    
param={
    'columns':columns,
    'kfold':{
        'n_splits': 8,
        'random_state': 1985,
        'shuffle': True,
        'type': 'stratified'
    },
    'scaler':{
        'cls':'StandardScaler',
    },
    'algorithm':{
        'cls':'lgb.LGBMRegressor',
        'init':{
            'learning_rate':(.01, .5),
            'feature_fraction':(.6, 1),
            'bagging_fraction':(0.6, 1),
            'min_data_in_leaf':(200,800),
            'lambda_l1':(1e-6, 1e2),
            'lambda_l2':(10, 100),
            'max_bin':(10, 100),
            'num_leaves':(4, 128),
            'random_state':(1, 9999),
            'n_jobs':32
        },
        'fit':{
            'eval_metric':'mae',
            'verbose':False,
            'early_stopping_rounds':200,
        },
    },
    'feature_importance':{
        'is_output':False,
        'permutation_feature_importance':False,
        'permutation_random_state':1,
    },
}

def tune_hypterparameters(df_train, param, df_test = None, mytrial=None, remark='', n_trials=200):
    
    def suggest_params(trial, hyper_parameters):
        params = hyper_parameters.copy()
        for k,v in params.items():
            if isinstance(v, tuple):
                params[k] = trial.suggest_int(k, v[0], v[1]) if (isinstance(v[0], int))&(isinstance(v[1], int)) else trial.suggest_uniform(k, v[0], v[1])
                print(params)
            elif isinstance(v, dict):
                suggest_params(trial, v)
        return params

    #  tune hypterparameters
    def objective(trial):
        
        args = suggest_params(trial, param)
        print(args)
        
        df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
    
        val_mae_mean = np.mean(df_his.valid)
        val_mae_var = np.var(df_his.valid)
        train_mae_mean = np.mean(df_his.train)
        train_mae_var = np.var(df_his.train)

        trial.set_user_attr('val_mae', val_mae_mean)
        trial.set_user_attr('train_mae', train_mae_mean)
        trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
        trial.set_user_attr('val_mae_var', val_mae_var)

        return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

    study = optuna.create_study()
    study.optimize(objective, n_trials=n_trials)
    
    return study

In [80]:
mytrial2 = []
tune_hypterparameters(df_train, param, mytrial=mytrial2, n_trials=20)

{'learning_rate': 0.3584109841611596, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.3584109841611596, 'feature_fraction': 0.6387578964405992, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.3584109841611596, 'feature_fraction': 0.6387578964405992, 'bagging_fraction': 0.7388710784733064, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.3584109841611596, 'feature_fraction': 0.6387578964405992, 'bagging_fraction': 0.7388710784733064, 'min_data_in_leaf': 510, 'lambda_l1': (1e-

[W 2019-05-10 05:34:22,279] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.07031987720530039, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.07031987720530039, 'feature_fraction': 0.6623259427375757, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.07031987720530039, 'feature_fraction': 0.6623259427375757, 'bagging_fraction': 0.6278577337642358, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.07031987720530039, 'feature_fraction': 0.6623259427375757, 'bagging_fraction': 0.6278577337642358, 'min_data_in_leaf': 434, 'lambda_l1': 

[W 2019-05-10 05:34:22,368] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.2960455969065444, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2960455969065444, 'feature_fraction': 0.9447513114012782, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2960455969065444, 'feature_fraction': 0.9447513114012782, 'bagging_fraction': 0.7785132036942328, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2960455969065444, 'feature_fraction': 0.9447513114012782, 'bagging_fraction': 0.7785132036942328, 'min_data_in_leaf': 749, 'lambda_l1': (1e-

[W 2019-05-10 05:34:22,456] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.2075119583852304, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2075119583852304, 'feature_fraction': 0.9107765612715379, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2075119583852304, 'feature_fraction': 0.9107765612715379, 'bagging_fraction': 0.6012287684442111, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2075119583852304, 'feature_fraction': 0.9107765612715379, 'bagging_fraction': 0.6012287684442111, 'min_data_in_leaf': 704, 'lambda_l1': (1e-

[W 2019-05-10 05:34:22,545] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.4521621952188789, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.4521621952188789, 'feature_fraction': 0.7805042623268381, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.4521621952188789, 'feature_fraction': 0.7805042623268381, 'bagging_fraction': 0.798731355380249, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.4521621952188789, 'feature_fraction': 0.7805042623268381, 'bagging_fraction': 0.798731355380249, 'min_data_in_leaf': 401, 'lambda_l1': (1e-06

[W 2019-05-10 05:34:22,635] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.25429272229489025, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.25429272229489025, 'feature_fraction': 0.9223046773948729, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.25429272229489025, 'feature_fraction': 0.9223046773948729, 'bagging_fraction': 0.8278233869247359, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.25429272229489025, 'feature_fraction': 0.9223046773948729, 'bagging_fraction': 0.8278233869247359, 'min_data_in_leaf': 738, 'lambda_l1': 

[W 2019-05-10 05:34:22,724] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.44734153312820224, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.44734153312820224, 'feature_fraction': 0.9050142294716783, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.44734153312820224, 'feature_fraction': 0.9050142294716783, 'bagging_fraction': 0.763479831452314, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.44734153312820224, 'feature_fraction': 0.9050142294716783, 'bagging_fraction': 0.763479831452314, 'min_data_in_leaf': 734, 'lambda_l1': (1

[W 2019-05-10 05:34:22,815] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.11378390281290626, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.11378390281290626, 'feature_fraction': 0.8910101637994026, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.11378390281290626, 'feature_fraction': 0.8910101637994026, 'bagging_fraction': 0.976353064235837, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.11378390281290626, 'feature_fraction': 0.8910101637994026, 'bagging_fraction': 0.976353064235837, 'min_data_in_leaf': 553, 'lambda_l1': (1

[W 2019-05-10 05:34:22,906] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.4592985312281811, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.4592985312281811, 'feature_fraction': 0.6922356140914232, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.4592985312281811, 'feature_fraction': 0.6922356140914232, 'bagging_fraction': 0.9772722574971712, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.4592985312281811, 'feature_fraction': 0.6922356140914232, 'bagging_fraction': 0.9772722574971712, 'min_data_in_leaf': 335, 'lambda_l1': (1e-

[W 2019-05-10 05:34:22,998] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.2846236509378044, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2846236509378044, 'feature_fraction': 0.8549520871301064, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2846236509378044, 'feature_fraction': 0.8549520871301064, 'bagging_fraction': 0.7660515567388837, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2846236509378044, 'feature_fraction': 0.8549520871301064, 'bagging_fraction': 0.7660515567388837, 'min_data_in_leaf': 739, 'lambda_l1': (1e-

[W 2019-05-10 05:34:23,089] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.2933309573832454, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2933309573832454, 'feature_fraction': 0.7473284013059696, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2933309573832454, 'feature_fraction': 0.7473284013059696, 'bagging_fraction': 0.9083078584505286, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2933309573832454, 'feature_fraction': 0.7473284013059696, 'bagging_fraction': 0.9083078584505286, 'min_data_in_leaf': 325, 'lambda_l1': (1e-

[W 2019-05-10 05:34:23,181] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.42217029510241766, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.42217029510241766, 'feature_fraction': 0.6006572411786842, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.42217029510241766, 'feature_fraction': 0.6006572411786842, 'bagging_fraction': 0.8613194132394444, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.42217029510241766, 'feature_fraction': 0.6006572411786842, 'bagging_fraction': 0.8613194132394444, 'min_data_in_leaf': 346, 'lambda_l1': 

[W 2019-05-10 05:34:23,275] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.2825102999249023, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2825102999249023, 'feature_fraction': 0.9324218805220897, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2825102999249023, 'feature_fraction': 0.9324218805220897, 'bagging_fraction': 0.7385999329908042, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.2825102999249023, 'feature_fraction': 0.9324218805220897, 'bagging_fraction': 0.7385999329908042, 'min_data_in_leaf': 439, 'lambda_l1': (1e-

[W 2019-05-10 05:34:23,367] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.3826713683372623, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.3826713683372623, 'feature_fraction': 0.8256540904079067, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.3826713683372623, 'feature_fraction': 0.8256540904079067, 'bagging_fraction': 0.7717594841376858, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.3826713683372623, 'feature_fraction': 0.8256540904079067, 'bagging_fraction': 0.7717594841376858, 'min_data_in_leaf': 747, 'lambda_l1': (1e-

[W 2019-05-10 05:34:23,461] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.09767730778396544, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.09767730778396544, 'feature_fraction': 0.7701611820467169, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.09767730778396544, 'feature_fraction': 0.7701611820467169, 'bagging_fraction': 0.6447724354449421, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.09767730778396544, 'feature_fraction': 0.7701611820467169, 'bagging_fraction': 0.6447724354449421, 'min_data_in_leaf': 643, 'lambda_l1': 

[W 2019-05-10 05:34:23,555] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.12187038546802388, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.12187038546802388, 'feature_fraction': 0.6687057396528031, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.12187038546802388, 'feature_fraction': 0.6687057396528031, 'bagging_fraction': 0.7276744107480912, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.12187038546802388, 'feature_fraction': 0.6687057396528031, 'bagging_fraction': 0.7276744107480912, 'min_data_in_leaf': 515, 'lambda_l1': 

[W 2019-05-10 05:34:23,649] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.227067271440082, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.227067271440082, 'feature_fraction': 0.8460375603980786, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.227067271440082, 'feature_fraction': 0.8460375603980786, 'bagging_fraction': 0.7912880362427912, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.227067271440082, 'feature_fraction': 0.8460375603980786, 'bagging_fraction': 0.7912880362427912, 'min_data_in_leaf': 460, 'lambda_l1': (1e-06, 

[W 2019-05-10 05:34:23,745] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.20738726053854944, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.20738726053854944, 'feature_fraction': 0.7929256024480484, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.20738726053854944, 'feature_fraction': 0.7929256024480484, 'bagging_fraction': 0.7590488511761067, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.20738726053854944, 'feature_fraction': 0.7929256024480484, 'bagging_fraction': 0.7590488511761067, 'min_data_in_leaf': 618, 'lambda_l1': 

[W 2019-05-10 05:34:23,840] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.21545050345137437, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.21545050345137437, 'feature_fraction': 0.6459745586028118, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.21545050345137437, 'feature_fraction': 0.6459745586028118, 'bagging_fraction': 0.9411749805530267, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.21545050345137437, 'feature_fraction': 0.6459745586028118, 'bagging_fraction': 0.9411749805530267, 'min_data_in_leaf': 563, 'lambda_l1': 

[W 2019-05-10 05:34:23,937] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

{'learning_rate': 0.052191289737010906, 'feature_fraction': (0.6, 1), 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.052191289737010906, 'feature_fraction': 0.8381385576138125, 'bagging_fraction': (0.6, 1), 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.052191289737010906, 'feature_fraction': 0.8381385576138125, 'bagging_fraction': 0.7693045035349892, 'min_data_in_leaf': (200, 800), 'lambda_l1': (1e-06, 100.0), 'lambda_l2': (10, 100), 'max_bin': (10, 100), 'num_leaves': (4, 128), 'random_state': (1, 9999), 'n_jobs': 32}
{'learning_rate': 0.052191289737010906, 'feature_fraction': 0.8381385576138125, 'bagging_fraction': 0.7693045035349892, 'min_data_in_leaf': 421, 'lambda_l

[W 2019-05-10 05:34:24,032] Setting trial status as TrialState.FAIL because of the following error: LightGBMError('Parameter seed should be of type int, got "1,9999"',)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/optuna/study.py", line 407, in _run_trial
    result = func(trial)
  File "<ipython-input-79-73775c1159c4>", line 69, in objective
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark=remark)
  File "/home/ubuntu/jupyter/wangzhaoxu/ep/LANLEarthquakePrediction2019/common.py", line 134, in process
    model.fit(X_train, y_train, **fit_param)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 683, in fit
    callbacks=callbacks)
  File "/home/ubuntu/anaconda3/envs/tensorflow_gpu_p36/lib/python3.6/site-packages/lightgbm/sklearn.py", line 542, in fit
    callbacks=callb

<optuna.study.Study at 0x7f998a1fee48>

In [13]:
models=[ 
            
            [RandomForestRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
#              LogisticRegression(random_state=1),
#              PCA(n_components=4,random_state=1)
             ],
            
            [RandomForestRegressor (n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]

In [9]:
from pystacknet.pystacknet import StackNetRegressor

In [18]:
model=StackNetRegressor(models, metric="mae", folds=5, restacking=True,use_retraining=True,  random_state=12345,n_jobs=8, verbose=1)

model.fit(df_train[columns].values, df_train['y'].values)

Input Dimensionality 54 at Level 0 
3 models included in Level 0 
Fold 1/5 , model 0 , mae===1.988092 
Fold 1/5 , model 1 , mae===2.041847 
Fold 1/5 , model 2 , mae===1.730922 
Fold 2/5 , model 0 , mae===1.990283 
Fold 2/5 , model 1 , mae===2.041897 
Fold 2/5 , model 2 , mae===1.724613 
Fold 3/5 , model 0 , mae===1.976886 
Fold 3/5 , model 1 , mae===2.039847 
Fold 3/5 , model 2 , mae===1.714903 
Fold 4/5 , model 0 , mae===1.957330 
Fold 4/5 , model 1 , mae===2.016984 
Fold 4/5 , model 2 , mae===1.703124 
Fold 5/5 , model 0 , mae===1.983725 
Fold 5/5 , model 1 , mae===2.052205 
Fold 5/5 , model 2 , mae===1.711925 
Output dimensionality of level 0 is 3 
 level 0 lasted 216.686837 seconds 
Input Dimensionality 57 at Level 1 
1 models included in Level 1 
Fold 1/5 , model 0 , mae===1.665880 
Fold 2/5 , model 0 , mae===1.661494 
Fold 3/5 , model 0 , mae===1.642096 
Fold 4/5 , model 0 , mae===1.634593 
Fold 5/5 , model 0 , mae===1.639487 
Output dimensionality of level 1 is 1 
 level 1 laste

In [19]:
y_preds=model.predict(df_test[columns].values)

1 estimators included in Level 0 
1 estimators included in Level 1 


In [20]:
df_submit = pd.DataFrame()
df_submit['time_to_failure'] = y_preds
df_submit['seg_id'] = df_test['index']
df_submit.to_csv('submission.csv', index=False)

array([[3.71812551],
       [5.45694043],
       [5.62894652],
       ...,
       [3.95122569],
       [1.79156266],
       [8.54803083]])

In [25]:
param={
    'algorithm': {
        'cls': 'cb.CatBoostRegressor',
        'fit': {
            'early_stopping_rounds': 200,
#             'eval_metric': 'mae',
            'verbose': False
        },
        'init': {
#             'task_type':"GPU",
        }
    },
    'columns': sorted_columns,
    'feature_importance': {
        'is_output': True,
        'permutation_feature_importance': True,
        'permutation_random_state': 1
    },
    'kfold': {
        'n_splits': 8,
        'random_state': 1985,
        'shuffle': True,
        'type': 'group'
    },
    'scaler': {
        'cls': 'StandardScaler'
    }
}

In [55]:
# run one try
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, df_trial.loc[8]['param'], df_test = df_test, trial=mytrial)

In [None]:
#  select features by permutation_weight
EP.select_features_(df_train, param, mytrial, nfeats_best=25, nfeats_removed_per_try=5, key='average_model_weight')

In [8]:
#  tune hypterparameters
def objective(trial):
        
    num_trees = trial.suggest_int('num_trees', 200, 1000)
    depth = trial.suggest_int('depth', 2, 10)
    learning_rate = trial.suggest_uniform('learning_rate', 0.01, 0.4)
    l2_leaf_reg = trial.suggest_uniform('l2_leaf_reg', 0.001, 100)
    bagging_temperature = trial.suggest_uniform('bagging_temperature', .6, 1)
    random_strength = trial.suggest_uniform('random_strength', .001, 1)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':df_trial.loc[12]['param']['columns'],
        'kfold':{
            'n_splits': 8,
            'random_state': 1985,
            'shuffle': True,
            'type': 'stratified'
        },
        'scaler':{
            'cls':'StandardScaler',
        },
        'algorithm':{
            'cls':'cb.CatBoostRegressor',
            'init':{
                "num_trees":num_trees,
                "depth":depth,
                "learning_rate":learning_rate,
                "l2_leaf_reg":l2_leaf_reg,
                "bagging_temperature":bagging_temperature,
                "random_strength":random_strength,
                "random_state":random_state,
            },
            'fit':{
#                 'eval_metric':'mae',
                'verbose':False,
                'early_stopping_rounds':200,
            },
        },
        'feature_importance':{
            'is_output':False,
            'permutation_feature_importance':False,
            'permutation_random_state':1,
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune stratified feats')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-09 06:27:49,563] Finished a trial resulted in value: 0.30110057098517246. Current best value is 0.30110057098517246 with parameters: {'num_trees': 821, 'depth': 8, 'learning_rate': 0.18032354032236617, 'l2_leaf_reg': 69.51728929081189, 'bagging_temperature': 0.6021312118199852, 'random_strength': 0.8926427907436212, 'random_state': 2521}.
[I 2019-05-09 06:42:27,392] Finished a trial resulted in value: 0.32453379542705335. Current best value is 0.30110057098517246 with parameters: {'num_trees': 821, 'depth': 8, 'learning_rate': 0.18032354032236617, 'l2_leaf_reg': 69.51728929081189, 'bagging_temperature': 0.6021312118199852, 'random_strength': 0.8926427907436212, 'random_state': 2521}.
[I 2019-05-09 06:43:52,307] Finished a trial resulted in value: 0.2589842926648078. Current best value is 0.2589842926648078 with parameters: {'num_trees': 914, 'depth': 5, 'learning_rate': 0.23118731015077143, 'l2_leaf_reg': 74.44630970873064, 'bagging_temperature': 0.9507324290640863, 'random_

[I 2019-05-09 08:23:22,518] Finished a trial resulted in value: 0.0615126153385211. Current best value is 0.0073351222707855315 with parameters: {'num_trees': 205, 'depth': 3, 'learning_rate': 0.015072839740710518, 'l2_leaf_reg': 99.1201034896227, 'bagging_temperature': 0.7284182224047564, 'random_strength': 0.43022467946427795, 'random_state': 290}.
[I 2019-05-09 08:41:50,462] Finished a trial resulted in value: 0.2932289845737951. Current best value is 0.0073351222707855315 with parameters: {'num_trees': 205, 'depth': 3, 'learning_rate': 0.015072839740710518, 'l2_leaf_reg': 99.1201034896227, 'bagging_temperature': 0.7284182224047564, 'random_strength': 0.43022467946427795, 'random_state': 290}.
[I 2019-05-09 08:42:09,967] Finished a trial resulted in value: 0.03680451879509619. Current best value is 0.0073351222707855315 with parameters: {'num_trees': 205, 'depth': 3, 'learning_rate': 0.015072839740710518, 'l2_leaf_reg': 99.1201034896227, 'bagging_temperature': 0.7284182224047564, 'r

[I 2019-05-09 09:25:57,810] Finished a trial resulted in value: 0.1299398744619727. Current best value is 0.0073351222707855315 with parameters: {'num_trees': 205, 'depth': 3, 'learning_rate': 0.015072839740710518, 'l2_leaf_reg': 99.1201034896227, 'bagging_temperature': 0.7284182224047564, 'random_strength': 0.43022467946427795, 'random_state': 290}.
[I 2019-05-09 09:26:37,420] Finished a trial resulted in value: 0.197607722509585. Current best value is 0.0073351222707855315 with parameters: {'num_trees': 205, 'depth': 3, 'learning_rate': 0.015072839740710518, 'l2_leaf_reg': 99.1201034896227, 'bagging_temperature': 0.7284182224047564, 'random_strength': 0.43022467946427795, 'random_state': 290}.
[I 2019-05-09 09:27:23,849] Finished a trial resulted in value: 0.10397717074031415. Current best value is 0.0073351222707855315 with parameters: {'num_trees': 205, 'depth': 3, 'learning_rate': 0.015072839740710518, 'l2_leaf_reg': 99.1201034896227, 'bagging_temperature': 0.7284182224047564, 'ra

[I 2019-05-09 10:37:16,280] Finished a trial resulted in value: 0.08016581408065405. Current best value is 0.0073351222707855315 with parameters: {'num_trees': 205, 'depth': 3, 'learning_rate': 0.015072839740710518, 'l2_leaf_reg': 99.1201034896227, 'bagging_temperature': 0.7284182224047564, 'random_strength': 0.43022467946427795, 'random_state': 290}.
[I 2019-05-09 10:37:33,365] Finished a trial resulted in value: 0.01202529462281827. Current best value is 0.0073351222707855315 with parameters: {'num_trees': 205, 'depth': 3, 'learning_rate': 0.015072839740710518, 'l2_leaf_reg': 99.1201034896227, 'bagging_temperature': 0.7284182224047564, 'random_strength': 0.43022467946427795, 'random_state': 290}.
[I 2019-05-09 10:38:01,397] Finished a trial resulted in value: 0.15390162154663786. Current best value is 0.0073351222707855315 with parameters: {'num_trees': 205, 'depth': 3, 'learning_rate': 0.015072839740710518, 'l2_leaf_reg': 99.1201034896227, 'bagging_temperature': 0.7284182224047564, 

[I 2019-05-09 11:05:45,923] Finished a trial resulted in value: 0.1342813821503539. Current best value is 0.0032916048186533136 with parameters: {'num_trees': 209, 'depth': 2, 'learning_rate': 0.01139270177285133, 'l2_leaf_reg': 30.40577639597169, 'bagging_temperature': 0.6004655760312939, 'random_strength': 0.47554601799733465, 'random_state': 5590}.
[I 2019-05-09 11:07:57,966] Finished a trial resulted in value: 0.10199929308453635. Current best value is 0.0032916048186533136 with parameters: {'num_trees': 209, 'depth': 2, 'learning_rate': 0.01139270177285133, 'l2_leaf_reg': 30.40577639597169, 'bagging_temperature': 0.6004655760312939, 'random_strength': 0.47554601799733465, 'random_state': 5590}.
[I 2019-05-09 11:08:16,032] Finished a trial resulted in value: 0.00782401671107166. Current best value is 0.0032916048186533136 with parameters: {'num_trees': 209, 'depth': 2, 'learning_rate': 0.01139270177285133, 'l2_leaf_reg': 30.40577639597169, 'bagging_temperature': 0.6004655760312939,

In [59]:
df_test_pred = df_trial.loc[12]['df_test_pred']

In [61]:
df_submit = pd.DataFrame()
df_submit['time_to_failure'] = np.mean(df_test_pred.drop(columns=['index']).values, axis=1)
df_submit['seg_id'] = df_test_pred['index']
df_submit.to_csv('submission.csv', index=False)

In [9]:
df_trial = pd.DataFrame(mytrial)

In [20]:
#[df_trial['mae_diff']<.05].sort_values(by=['val_mae'])
df_trial[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(20)

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
0,2019-05-08 09:13:51.441156,1071,1.594586,2.2e-05,1.740538,0.000213,0.145952
1,2019-05-08 12:28:21.328235,200,1.576306,1.2e-05,1.704236,0.000296,0.127929
2,2019-05-08 13:27:41.334635,150,1.58093,2.1e-05,1.706429,0.000226,0.125499
3,2019-05-08 13:39:54.456132,100,1.576934,1.9e-05,1.696824,0.000187,0.11989
4,2019-05-08 13:47:35.439605,50,1.60094,3.2e-05,1.700155,0.000159,0.099215
5,2019-05-08 14:03:19.640908,50,1.601548,5.7e-05,1.701784,0.000143,0.100236
6,2019-05-08 14:10:21.323747,45,1.599223,4.4e-05,1.697374,0.000184,0.098151
7,2019-05-08 14:16:57.268365,40,1.603685,1.8e-05,1.699155,0.000205,0.09547
8,2019-05-08 14:23:25.597068,35,1.600778,5.2e-05,1.696229,0.000154,0.095451
9,2019-05-08 14:29:08.694610,30,1.616618,9.3e-05,1.708761,0.000215,0.092142


In [18]:
df_trial.loc[143:143][['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
143,2019-05-09 10:18:13.843707,35,1.790835,7e-06,1.84079,0.000248,0.049955


In [63]:
# df_trial.to_pickle('../trial/catboost.pkl')