In [30]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb
import catboost as cb
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP
from dfdb import DFDB

import types
import copy

In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [4]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [5]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [6]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [7]:
db = DFDB('../trial/gradientboosting.pkl', auto_commit=False)

In [14]:
param={'columns': tsfresh_columns,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler'},
 'algorithm': {'cls': 'GradientBoostingRegressor',
  'init': {'max_depth': 3,
   'learning_rate': 0.04018024140881379,
   'n_estimators': 253,
   'subsample': 0.6846361552509973,
   'alpha': 0.9990262087522855,
   'random_state': 2784},
  'fit': {}},
}

In [15]:
mytrial = []
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, is_output_feature_importance=True)

In [17]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()
df_trial[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
0,2019-05-18 00:17:38.066856,1071,1.752555,0.00113,2.116371,0.006942,0.363816


In [19]:
#check feature_importances
df_feature_importances = df_trial.loc[0]['df_feature_importances']
sorted_columns = EP.evaluate(df_feature_importances, key='average_permutation_weight')

In [21]:
param={'columns': sorted_columns[:200],
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler'},
 'algorithm': {'cls': 'GradientBoostingRegressor',
  'init': {'max_depth': 3,
   'learning_rate': 0.04018024140881379,
   'n_estimators': 253,
   'subsample': 0.6846361552509973,
   'alpha': 0.9990262087522855,
   'random_state': 2784},
  'fit': {}},
}

In [22]:
mytrial =[]
EP.select_features_(df_train, param, mytrial, nfeats_best=10, nfeats_removed_per_try=10, key='average_permutation_weight', remark='group3 RFE')
for trial_i in mytrial:
    db.insert(trial_i)

In [24]:
df_trial = db.select()
df_trial[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
0,2019-05-18 00:17:38.066856,1071,1.752555,0.00113,2.116371,0.006942,0.363816
1,2019-05-18 01:21:53.434843,200,1.775867,0.001112,2.080737,0.005969,0.30487
2,2019-05-18 01:27:53.875842,190,1.793276,0.001526,2.065772,0.00701,0.272496
3,2019-05-18 01:33:34.231327,180,1.800125,0.001787,2.048522,0.008908,0.248397
4,2019-05-18 01:38:53.834804,170,1.81046,0.001808,2.037984,0.010806,0.227525
5,2019-05-18 01:43:51.997373,160,1.812108,0.001798,2.037456,0.011449,0.225348
6,2019-05-18 01:48:30.721767,150,1.813111,0.001823,2.033778,0.011009,0.220667
7,2019-05-18 01:52:49.011959,140,1.813804,0.001789,2.034804,0.01089,0.220999
8,2019-05-18 01:56:46.439926,130,1.815882,0.001914,2.034085,0.010805,0.218204
9,2019-05-18 02:00:28.485857,120,1.815395,0.001898,2.034355,0.010786,0.21896


In [25]:
db.commit()

In [26]:
mytrial =[]
EP.select_features_(df_train, df_trial.loc[19]['param'], mytrial, nfeats_best=10, nfeats_removed_per_try=1, key='average_permutation_weight', remark='group3 RFE2')
for trial_i in mytrial:
    db.insert(trial_i)

In [27]:
df_trial = db.select()
df_trial[df_trial['remark']=='group3 RFE2'][['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
21,2019-05-18 02:25:42.449585,20,1.857168,0.001773,2.042745,0.011272,0.185577
22,2019-05-18 02:26:13.795449,19,1.859586,0.001854,2.036428,0.012066,0.176842
23,2019-05-18 02:26:43.940775,18,1.865293,0.001703,2.031389,0.01167,0.166095
24,2019-05-18 02:27:12.267511,17,1.876224,0.001257,2.036826,0.010318,0.160602
25,2019-05-18 02:27:39.104342,16,1.875643,0.0012,2.037613,0.010111,0.16197
26,2019-05-18 02:28:04.254919,15,1.878467,0.001303,2.038172,0.010235,0.159706
27,2019-05-18 02:28:28.291016,14,1.875871,0.001235,2.036605,0.01046,0.160734
28,2019-05-18 02:28:50.774968,13,1.882226,0.001036,2.03987,0.009919,0.157644
29,2019-05-18 02:29:11.688392,12,1.882651,0.000986,2.041126,0.009463,0.158475
30,2019-05-18 02:29:30.861696,11,1.893193,0.001271,2.038021,0.011665,0.144828


In [28]:
db.commit()

In [31]:
mytrial =[]
columns_ = copy.deepcopy(df_trial.loc[23]['param']['columns'])

###  tune hypterparameters
def objective(trial):
    
    max_depth = trial.suggest_int('max_depth', 2, 6)
    learning_rate = trial.suggest_uniform('learning_rate', 0.01, 0.4)
    n_estimators = trial.suggest_int('n_estimators', 100,500)
    subsample = trial.suggest_uniform('subsample', 0.6, 1.0)
    alpha = trial.suggest_uniform('alpha', 0.00001, 1.0)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':columns_,
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'group'
        },
        'scaler':{
            'cls':'StandardScaler',
        },
        'algorithm':{
            'cls':'GradientBoostingRegressor',
            'init':{
                "max_depth":max_depth,
                "learning_rate":learning_rate,
                "n_estimators":n_estimators,
                "subsample":subsample,
                "alpha":alpha,
                "random_state":random_state,
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 23')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-18 07:03:18,230] Finished trial#0 resulted in value: 1.7662584297972572. Current best value is 1.7662584297972572 with parameters: {'max_depth': 6, 'learning_rate': 0.16733203099908625, 'n_estimators': 167, 'subsample': 0.9606969222410028, 'alpha': 0.5697891363858995, 'random_state': 2558}.
[I 2019-05-18 07:03:38,603] Finished trial#1 resulted in value: 0.23355283845162803. Current best value is 0.23355283845162803 with parameters: {'max_depth': 4, 'learning_rate': 0.018456121906802705, 'n_estimators': 149, 'subsample': 0.7407750785683224, 'alpha': 0.17931094650827334, 'random_state': 5539}.
[I 2019-05-18 07:03:59,607] Finished trial#2 resulted in value: 1.1364539270587581. Current best value is 0.23355283845162803 with parameters: {'max_depth': 4, 'learning_rate': 0.018456121906802705, 'n_estimators': 149, 'subsample': 0.7407750785683224, 'alpha': 0.17931094650827334, 'random_state': 5539}.
[I 2019-05-18 07:04:42,062] Finished trial#3 resulted in value: 0.8848879620073872. 

[I 2019-05-18 07:15:25,474] Finished trial#26 resulted in value: 0.9054898874320744. Current best value is 0.08829716776448694 with parameters: {'max_depth': 2, 'learning_rate': 0.011865696176961928, 'n_estimators': 202, 'subsample': 0.8675949715443189, 'alpha': 0.3678654085280548, 'random_state': 9852}.
[I 2019-05-18 07:16:03,211] Finished trial#27 resulted in value: 0.6311838774574736. Current best value is 0.08829716776448694 with parameters: {'max_depth': 2, 'learning_rate': 0.011865696176961928, 'n_estimators': 202, 'subsample': 0.8675949715443189, 'alpha': 0.3678654085280548, 'random_state': 9852}.
[I 2019-05-18 07:16:35,935] Finished trial#28 resulted in value: 1.4402323479042158. Current best value is 0.08829716776448694 with parameters: {'max_depth': 2, 'learning_rate': 0.011865696176961928, 'n_estimators': 202, 'subsample': 0.8675949715443189, 'alpha': 0.3678654085280548, 'random_state': 9852}.
[I 2019-05-18 07:17:01,329] Finished trial#29 resulted in value: 0.759028262084272

[I 2019-05-18 07:28:46,711] Finished trial#52 resulted in value: 1.704837329353914. Current best value is 0.08829716776448694 with parameters: {'max_depth': 2, 'learning_rate': 0.011865696176961928, 'n_estimators': 202, 'subsample': 0.8675949715443189, 'alpha': 0.3678654085280548, 'random_state': 9852}.
[I 2019-05-18 07:29:03,957] Finished trial#53 resulted in value: 0.6447852389096201. Current best value is 0.08829716776448694 with parameters: {'max_depth': 2, 'learning_rate': 0.011865696176961928, 'n_estimators': 202, 'subsample': 0.8675949715443189, 'alpha': 0.3678654085280548, 'random_state': 9852}.
[I 2019-05-18 07:29:28,804] Finished trial#54 resulted in value: 1.1776996094991512. Current best value is 0.08829716776448694 with parameters: {'max_depth': 2, 'learning_rate': 0.011865696176961928, 'n_estimators': 202, 'subsample': 0.8675949715443189, 'alpha': 0.3678654085280548, 'random_state': 9852}.
[I 2019-05-18 07:29:42,864] Finished trial#55 resulted in value: 0.4679309119891510

[I 2019-05-18 07:36:40,292] Finished trial#78 resulted in value: 0.19529012538974488. Current best value is 0.0612922537874038 with parameters: {'max_depth': 2, 'learning_rate': 0.010462202823845879, 'n_estimators': 135, 'subsample': 0.9393934649373464, 'alpha': 0.3649021047959538, 'random_state': 7128}.
[I 2019-05-18 07:36:54,276] Finished trial#79 resulted in value: 0.38932895731491. Current best value is 0.0612922537874038 with parameters: {'max_depth': 2, 'learning_rate': 0.010462202823845879, 'n_estimators': 135, 'subsample': 0.9393934649373464, 'alpha': 0.3649021047959538, 'random_state': 7128}.
[I 2019-05-18 07:37:01,572] Finished trial#80 resulted in value: 0.24050074253546094. Current best value is 0.0612922537874038 with parameters: {'max_depth': 2, 'learning_rate': 0.010462202823845879, 'n_estimators': 135, 'subsample': 0.9393934649373464, 'alpha': 0.3649021047959538, 'random_state': 7128}.
[I 2019-05-18 07:37:19,900] Finished trial#81 resulted in value: 0.4370132404524756. 

[I 2019-05-18 07:45:46,701] Finished trial#104 resulted in value: 0.5459309839872347. Current best value is 0.0612922537874038 with parameters: {'max_depth': 2, 'learning_rate': 0.010462202823845879, 'n_estimators': 135, 'subsample': 0.9393934649373464, 'alpha': 0.3649021047959538, 'random_state': 7128}.
[I 2019-05-18 07:46:11,084] Finished trial#105 resulted in value: 0.889131787042239. Current best value is 0.0612922537874038 with parameters: {'max_depth': 2, 'learning_rate': 0.010462202823845879, 'n_estimators': 135, 'subsample': 0.9393934649373464, 'alpha': 0.3649021047959538, 'random_state': 7128}.
[I 2019-05-18 07:46:22,723] Finished trial#106 resulted in value: 0.7701273183067279. Current best value is 0.0612922537874038 with parameters: {'max_depth': 2, 'learning_rate': 0.010462202823845879, 'n_estimators': 135, 'subsample': 0.9393934649373464, 'alpha': 0.3649021047959538, 'random_state': 7128}.
[I 2019-05-18 07:46:47,573] Finished trial#107 resulted in value: 0.893176942593353

[I 2019-05-18 07:58:07,696] Finished trial#130 resulted in value: 0.6475499101583992. Current best value is 0.05338540322453145 with parameters: {'max_depth': 2, 'learning_rate': 0.011200184654363315, 'n_estimators': 109, 'subsample': 0.8190892484126793, 'alpha': 0.20577496553471922, 'random_state': 2874}.
[I 2019-05-18 07:58:34,344] Finished trial#131 resulted in value: 0.14615907904967848. Current best value is 0.05338540322453145 with parameters: {'max_depth': 2, 'learning_rate': 0.011200184654363315, 'n_estimators': 109, 'subsample': 0.8190892484126793, 'alpha': 0.20577496553471922, 'random_state': 2874}.
[I 2019-05-18 07:59:06,327] Finished trial#132 resulted in value: 0.904198281829493. Current best value is 0.05338540322453145 with parameters: {'max_depth': 2, 'learning_rate': 0.011200184654363315, 'n_estimators': 109, 'subsample': 0.8190892484126793, 'alpha': 0.20577496553471922, 'random_state': 2874}.
[I 2019-05-18 07:59:23,081] Finished trial#133 resulted in value: 0.35479726

[I 2019-05-18 08:05:16,712] Finished trial#156 resulted in value: 0.21403842173453144. Current best value is 0.05338540322453145 with parameters: {'max_depth': 2, 'learning_rate': 0.011200184654363315, 'n_estimators': 109, 'subsample': 0.8190892484126793, 'alpha': 0.20577496553471922, 'random_state': 2874}.
[I 2019-05-18 08:05:23,431] Finished trial#157 resulted in value: 0.2129085656569615. Current best value is 0.05338540322453145 with parameters: {'max_depth': 2, 'learning_rate': 0.011200184654363315, 'n_estimators': 109, 'subsample': 0.8190892484126793, 'alpha': 0.20577496553471922, 'random_state': 2874}.
[I 2019-05-18 08:05:42,353] Finished trial#158 resulted in value: 0.2570682587916378. Current best value is 0.05338540322453145 with parameters: {'max_depth': 2, 'learning_rate': 0.011200184654363315, 'n_estimators': 109, 'subsample': 0.8190892484126793, 'alpha': 0.20577496553471922, 'random_state': 2874}.
[I 2019-05-18 08:05:55,207] Finished trial#159 resulted in value: 0.5281067

[I 2019-05-18 08:12:41,860] Finished trial#182 resulted in value: 0.32370885213065587. Current best value is 0.05338540322453145 with parameters: {'max_depth': 2, 'learning_rate': 0.011200184654363315, 'n_estimators': 109, 'subsample': 0.8190892484126793, 'alpha': 0.20577496553471922, 'random_state': 2874}.
[I 2019-05-18 08:12:53,183] Finished trial#183 resulted in value: 0.6397418392385185. Current best value is 0.05338540322453145 with parameters: {'max_depth': 2, 'learning_rate': 0.011200184654363315, 'n_estimators': 109, 'subsample': 0.8190892484126793, 'alpha': 0.20577496553471922, 'random_state': 2874}.
[I 2019-05-18 08:13:17,683] Finished trial#184 resulted in value: 0.362032193313339. Current best value is 0.05338540322453145 with parameters: {'max_depth': 2, 'learning_rate': 0.011200184654363315, 'n_estimators': 109, 'subsample': 0.8190892484126793, 'alpha': 0.20577496553471922, 'random_state': 2874}.
[I 2019-05-18 08:13:25,515] Finished trial#185 resulted in value: 0.35532041

In [33]:
for trial_i in mytrial:
    db.insert(trial_i)

In [34]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 23')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
181,2019-05-18 08:03:47.057550,tune 23,18,2.032553,0.002867,2.080601,0.011945,0.048049
149,2019-05-18 07:51:22.210733,tune 23,18,2.039637,0.002869,2.08589,0.011561,0.046253
167,2019-05-18 07:59:46.461391,tune 23,18,2.040172,0.00288,2.08624,0.011934,0.046068
43,2019-05-18 07:09:26.028333,tune 23,18,2.048812,0.002893,2.092745,0.011581,0.043933
203,2019-05-18 08:09:00.363434,tune 23,18,2.050189,0.002867,2.093182,0.011648,0.042993
49,2019-05-18 07:12:13.171108,tune 23,18,2.059042,0.002854,2.101067,0.01146,0.042025
108,2019-05-18 07:36:16.021443,tune 23,18,2.060967,0.002876,2.103015,0.011546,0.042049
99,2019-05-18 07:34:39.017661,tune 23,18,2.077046,0.002841,2.116267,0.011351,0.039221
98,2019-05-18 07:34:25.290910,tune 23,18,2.078141,0.002853,2.117434,0.011301,0.039293
195,2019-05-18 08:06:49.878070,tune 23,18,2.082113,0.002833,2.120605,0.011109,0.038493


In [35]:
db.commit()

In [36]:
param = copy.deepcopy(df_trial.loc[181]['param'])
param['kfold']['type'] = 'stratified'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 181 use stratified')

In [37]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [38]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
232,2019-05-18 08:29:06.495866,18,2.04161,2e-06,2.053282,8.4e-05,0.011672


In [39]:
db.commit()

In [40]:
mytrial =[]
columns_ = copy.deepcopy(df_trial.loc[23]['param']['columns'])

###  tune hypterparameters
def objective(trial):
    
    max_depth = trial.suggest_int('max_depth', 2, 6)
    learning_rate = trial.suggest_uniform('learning_rate', 0.01, 0.4)
    n_estimators = trial.suggest_int('n_estimators', 100,500)
    subsample = trial.suggest_uniform('subsample', 0.6, 1.0)
    alpha = trial.suggest_uniform('alpha', 0.00001, 1.0)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':columns_,
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'stratified'
        },
        'scaler':{
            'cls':'StandardScaler',
        },
        'algorithm':{
            'cls':'GradientBoostingRegressor',
            'init':{
                "max_depth":max_depth,
                "learning_rate":learning_rate,
                "n_estimators":n_estimators,
                "subsample":subsample,
                "alpha":alpha,
                "random_state":random_state,
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 23 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-18 08:31:20,576] Finished trial#0 resulted in value: 0.8487510993798351. Current best value is 0.8487510993798351 with parameters: {'max_depth': 4, 'learning_rate': 0.15646143209239954, 'n_estimators': 497, 'subsample': 0.9033797562567608, 'alpha': 0.8187514966904319, 'random_state': 7590}.
[I 2019-05-18 08:32:55,000] Finished trial#1 resulted in value: 0.5472248805264728. Current best value is 0.5472248805264728 with parameters: {'max_depth': 6, 'learning_rate': 0.03518950760989953, 'n_estimators': 400, 'subsample': 0.7077154534172201, 'alpha': 0.5929302552040427, 'random_state': 8159}.
[I 2019-05-18 08:33:21,469] Finished trial#2 resulted in value: 0.6658442442597644. Current best value is 0.5472248805264728 with parameters: {'max_depth': 6, 'learning_rate': 0.03518950760989953, 'n_estimators': 400, 'subsample': 0.7077154534172201, 'alpha': 0.5929302552040427, 'random_state': 8159}.
[I 2019-05-18 08:34:02,386] Finished trial#3 resulted in value: 1.2993857023050523. Current

[I 2019-05-18 08:48:32,295] Finished trial#26 resulted in value: 0.2951213961557401. Current best value is 0.055056276176452604 with parameters: {'max_depth': 3, 'learning_rate': 0.015537904400981038, 'n_estimators': 282, 'subsample': 0.8281887773926867, 'alpha': 0.40785156291381175, 'random_state': 6105}.
[I 2019-05-18 08:49:02,241] Finished trial#27 resulted in value: 0.21279086271768344. Current best value is 0.055056276176452604 with parameters: {'max_depth': 3, 'learning_rate': 0.015537904400981038, 'n_estimators': 282, 'subsample': 0.8281887773926867, 'alpha': 0.40785156291381175, 'random_state': 6105}.
[I 2019-05-18 08:50:11,279] Finished trial#28 resulted in value: 0.8823840770279637. Current best value is 0.055056276176452604 with parameters: {'max_depth': 3, 'learning_rate': 0.015537904400981038, 'n_estimators': 282, 'subsample': 0.8281887773926867, 'alpha': 0.40785156291381175, 'random_state': 6105}.
[I 2019-05-18 08:51:04,467] Finished trial#29 resulted in value: 0.97070443

[I 2019-05-18 09:02:20,426] Finished trial#52 resulted in value: 0.11497801938221838. Current best value is 0.039179892850841454 with parameters: {'max_depth': 2, 'learning_rate': 0.029130401178004618, 'n_estimators': 212, 'subsample': 0.8050927985427604, 'alpha': 0.757025866148312, 'random_state': 458}.
[I 2019-05-18 09:03:01,974] Finished trial#53 resulted in value: 1.8704279140224411. Current best value is 0.039179892850841454 with parameters: {'max_depth': 2, 'learning_rate': 0.029130401178004618, 'n_estimators': 212, 'subsample': 0.8050927985427604, 'alpha': 0.757025866148312, 'random_state': 458}.
[I 2019-05-18 09:03:17,896] Finished trial#54 resulted in value: 0.11667732172645373. Current best value is 0.039179892850841454 with parameters: {'max_depth': 2, 'learning_rate': 0.029130401178004618, 'n_estimators': 212, 'subsample': 0.8050927985427604, 'alpha': 0.757025866148312, 'random_state': 458}.
[I 2019-05-18 09:03:49,181] Finished trial#55 resulted in value: 0.2255177847739099

[I 2019-05-18 09:10:28,967] Finished trial#78 resulted in value: 0.05953635903911195. Current best value is 0.013164329352310145 with parameters: {'max_depth': 2, 'learning_rate': 0.010378441846771454, 'n_estimators': 105, 'subsample': 0.6349959510876115, 'alpha': 0.5332819236732607, 'random_state': 3444}.
[I 2019-05-18 09:10:40,177] Finished trial#79 resulted in value: 0.016391123167358745. Current best value is 0.013164329352310145 with parameters: {'max_depth': 2, 'learning_rate': 0.010378441846771454, 'n_estimators': 105, 'subsample': 0.6349959510876115, 'alpha': 0.5332819236732607, 'random_state': 3444}.
[I 2019-05-18 09:10:52,089] Finished trial#80 resulted in value: 0.16815778000450513. Current best value is 0.013164329352310145 with parameters: {'max_depth': 2, 'learning_rate': 0.010378441846771454, 'n_estimators': 105, 'subsample': 0.6349959510876115, 'alpha': 0.5332819236732607, 'random_state': 3444}.
[I 2019-05-18 09:11:04,706] Finished trial#81 resulted in value: 0.08025640

[I 2019-05-18 09:18:24,848] Finished trial#104 resulted in value: 0.2772056747278164. Current best value is 0.013164329352310145 with parameters: {'max_depth': 2, 'learning_rate': 0.010378441846771454, 'n_estimators': 105, 'subsample': 0.6349959510876115, 'alpha': 0.5332819236732607, 'random_state': 3444}.
[I 2019-05-18 09:18:35,504] Finished trial#105 resulted in value: 0.07435634712230034. Current best value is 0.013164329352310145 with parameters: {'max_depth': 2, 'learning_rate': 0.010378441846771454, 'n_estimators': 105, 'subsample': 0.6349959510876115, 'alpha': 0.5332819236732607, 'random_state': 3444}.
[I 2019-05-18 09:19:00,136] Finished trial#106 resulted in value: 0.7763116888034012. Current best value is 0.013164329352310145 with parameters: {'max_depth': 2, 'learning_rate': 0.010378441846771454, 'n_estimators': 105, 'subsample': 0.6349959510876115, 'alpha': 0.5332819236732607, 'random_state': 3444}.
[I 2019-05-18 09:21:04,126] Finished trial#107 resulted in value: 0.2786796

[I 2019-05-18 09:29:02,535] Finished trial#130 resulted in value: 0.21135599669138289. Current best value is 0.013164329352310145 with parameters: {'max_depth': 2, 'learning_rate': 0.010378441846771454, 'n_estimators': 105, 'subsample': 0.6349959510876115, 'alpha': 0.5332819236732607, 'random_state': 3444}.
[I 2019-05-18 09:29:20,153] Finished trial#131 resulted in value: 0.05504096756721807. Current best value is 0.013164329352310145 with parameters: {'max_depth': 2, 'learning_rate': 0.010378441846771454, 'n_estimators': 105, 'subsample': 0.6349959510876115, 'alpha': 0.5332819236732607, 'random_state': 3444}.
[I 2019-05-18 09:29:37,178] Finished trial#132 resulted in value: 0.06014400666903797. Current best value is 0.013164329352310145 with parameters: {'max_depth': 2, 'learning_rate': 0.010378441846771454, 'n_estimators': 105, 'subsample': 0.6349959510876115, 'alpha': 0.5332819236732607, 'random_state': 3444}.
[I 2019-05-18 09:30:05,295] Finished trial#133 resulted in value: 0.34825

[I 2019-05-18 09:35:01,162] Finished trial#156 resulted in value: 0.06119215731701109. Current best value is 0.011725739310265625 with parameters: {'max_depth': 2, 'learning_rate': 0.010071840203483255, 'n_estimators': 101, 'subsample': 0.6001889656944641, 'alpha': 0.13677944350004018, 'random_state': 7775}.
[I 2019-05-18 09:35:10,297] Finished trial#157 resulted in value: 0.031207785531613262. Current best value is 0.011725739310265625 with parameters: {'max_depth': 2, 'learning_rate': 0.010071840203483255, 'n_estimators': 101, 'subsample': 0.6001889656944641, 'alpha': 0.13677944350004018, 'random_state': 7775}.
[I 2019-05-18 09:35:45,765] Finished trial#158 resulted in value: 0.08305493488112767. Current best value is 0.011725739310265625 with parameters: {'max_depth': 2, 'learning_rate': 0.010071840203483255, 'n_estimators': 101, 'subsample': 0.6001889656944641, 'alpha': 0.13677944350004018, 'random_state': 7775}.
[I 2019-05-18 09:35:56,263] Finished trial#159 resulted in value: 0.0

[I 2019-05-18 09:41:56,529] Finished trial#182 resulted in value: 0.15486607091131535. Current best value is 0.011725739310265625 with parameters: {'max_depth': 2, 'learning_rate': 0.010071840203483255, 'n_estimators': 101, 'subsample': 0.6001889656944641, 'alpha': 0.13677944350004018, 'random_state': 7775}.
[I 2019-05-18 09:42:08,688] Finished trial#183 resulted in value: 0.04636979304889517. Current best value is 0.011725739310265625 with parameters: {'max_depth': 2, 'learning_rate': 0.010071840203483255, 'n_estimators': 101, 'subsample': 0.6001889656944641, 'alpha': 0.13677944350004018, 'random_state': 7775}.
[I 2019-05-18 09:42:29,279] Finished trial#184 resulted in value: 0.07410941056511865. Current best value is 0.011725739310265625 with parameters: {'max_depth': 2, 'learning_rate': 0.010071840203483255, 'n_estimators': 101, 'subsample': 0.6001889656944641, 'alpha': 0.13677944350004018, 'random_state': 7775}.
[I 2019-05-18 09:43:41,238] Finished trial#185 resulted in value: 0.17

In [41]:
for trial_i in mytrial:
    db.insert(trial_i)

In [42]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 23 by stratified')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
273,2019-05-18 08:57:10.043037,tune 23 by stratified,18,1.902923,3.3e-05,1.950592,0.000158,0.047668
319,2019-05-18 09:12:07.563939,tune 23 by stratified,18,1.906247,2.5e-05,1.953937,9.7e-05,0.04769
317,2019-05-18 09:11:49.833092,tune 23 by stratified,18,1.907831,3e-05,1.954742,0.000137,0.046911
358,2019-05-18 09:27:08.903955,tune 23 by stratified,18,1.905993,4e-05,1.955575,0.000127,0.049581
391,2019-05-18 09:35:45.745440,tune 23 by stratified,18,1.915482,3.7e-05,1.957903,0.000126,0.04242
281,2019-05-18 09:00:49.003177,tune 23 by stratified,18,1.911366,2.8e-05,1.958019,0.00014,0.046653
412,2019-05-18 09:41:00.516634,tune 23 by stratified,18,1.925449,3.8e-05,1.963844,0.000146,0.038394
341,2019-05-18 09:21:15.488003,tune 23 by stratified,18,1.922183,2.8e-05,1.964267,0.000124,0.042084
314,2019-05-18 09:11:04.694909,tune 23 by stratified,18,1.923652,1.9e-05,1.964505,0.000141,0.040853
241,2019-05-18 08:37:05.224366,tune 23 by stratified,18,1.921602,3.1e-05,1.964746,0.000121,0.043144


In [35]:
db.commit()

In [43]:
param = copy.deepcopy(df_trial.loc[273]['param'])
param['kfold']['type'] = 'group'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 273 use group')

In [44]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [45]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
433,2019-05-18 11:08:44.349147,18,1.875567,0.001795,2.035,0.011964,0.159432


In [46]:
db.commit()