In [1]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import seaborn as sns
from pprint import pprint
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm

import pickle
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error
from skopt import gp_minimize

import lightgbm as lgb
from skopt import BayesSearchCV
from sklearn.svm import SVR

The purpose of this notebook is to examine the tsfresh 788 features created from the LANL time series
We want to remove the features that dont have much value (e.g., mostly 0)

In [2]:
X_train1 = pd.read_pickle("Xtf_train1.pkl")
X_train3 = pd.read_pickle("Xtf_train3.pkl")
X_train4 = pd.read_pickle("Xtf_train4.pkl")

X1_train = pd.concat([X_train1, X_train3, X_train4], ignore_index=True)
X1_train.reset_index(drop=True)
print(X1_train.shape)


(3000, 788)


In [3]:
temp =X1_train.astype(bool).sum(axis=0)
# look for feature % that has valid values
temp = 100*temp/X1_train.shape[0]
print(temp.shape)

(788,)


In [4]:
len(temp[temp<50])

71

In [5]:
feature2drop = temp[temp<50].index.tolist()

In [6]:
X1_train_reduced = X1_train.drop(feature2drop,axis=1)
print(X1_train_reduced.shape)

(3000, 717)


In [7]:
y1_train_1 = pd.read_pickle("y_train1.pkl")[0:1000]
y1_train_2 = pd.read_pickle("y_train1.pkl")[2000:4000]
y1_train = pd.concat([y1_train_1, y1_train_2], ignore_index=True)
y1_train.reset_index(drop=True)
print(y1_train.shape)

(3000,)


In [8]:
XX1_train, XX1_test, yy1_train, yy1_test = train_test_split(X1_train_reduced , y1_train, test_size=0.2, random_state=42)
print(XX1_train.shape, yy1_train.shape)
print(XX1_test.shape, yy1_test.shape)

(2400, 717) (2400,)
(600, 717) (600,)


---

In [9]:
def status_print(optim_result):
    """Status callback durring bayesian hyperparameter search"""
    
    # Get all the models tested so far in DataFrame format
    all_models = pd.DataFrame(bayes_cv_tuner.cv_results_)    
    
    # Get current parameters and the best parameters    
    best_params = pd.Series(bayes_cv_tuner.best_params_)
    print('Model #{}\nBest MAE: {}\nBest params: {}\n'.format(
        len(all_models),
        np.round(bayes_cv_tuner.best_score_, 4),
        bayes_cv_tuner.best_params_
    ))

In [10]:
# reference
# https://www.kaggle.com/nanomathias/bayesian-optimization-of-xgboost-lb-0-9769

bayes_cv_tuner = BayesSearchCV(
    estimator = lgb.LGBMRegressor(
        objective='regression',
        metric='mae',
        num_threads=4,
    ),
    search_spaces = {
        'learning_rate': (0.01,0.05, 'uniform'),
        'num_leaves': (2, 256),      
        'max_depth': (2, 20),
        'subsample': (0.01, 1.0, 'uniform'),
        'subsample_freq': (0, 10),
        'colsample_bytree': (0.01, 1.0, 'uniform'),
        'min_child_weight': (0, 10),
        'reg_lambda': (1e-9, 1000, 'log-uniform'),
        'reg_alpha': (1e-9, 1.0, 'log-uniform'),
        'n_estimators': (10, 5000),
        'feature_fraction': (0.5, 1.0),
        'bagging_fraction': (0.5, 1.0)
    },    
    scoring = 'neg_mean_absolute_error',
    cv = TimeSeriesSplit(n_splits=10),
    n_iter = 30,   
    verbose = 0,
    refit = True,
    random_state = 42
)

In [11]:
result = bayes_cv_tuner.fit(XX1_train, yy1_train, callback=status_print)

Model #1
Best MAE: -1.9128
Best params: {'bagging_fraction': 0.705051979426657, 'colsample_bytree': 0.7304484857455519, 'feature_fraction': 0.966433999423917, 'learning_rate': 0.02263198373948195, 'max_depth': 14, 'min_child_weight': 4, 'n_estimators': 1761, 'num_leaves': 190, 'reg_alpha': 5.497557739289786e-07, 'reg_lambda': 0.05936070635912049, 'subsample': 0.5544643023916863, 'subsample_freq': 1}

Model #2
Best MAE: -1.9121
Best params: {'bagging_fraction': 0.9186941777766422, 'colsample_bytree': 0.8844821246070537, 'feature_fraction': 0.6517050549420875, 'learning_rate': 0.04804895626373318, 'max_depth': 18, 'min_child_weight': 1, 'n_estimators': 700, 'num_leaves': 92, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'subsample': 0.6336020558163782, 'subsample_freq': 10}

Model #3
Best MAE: -1.9053
Best params: {'bagging_fraction': 0.7224162561505759, 'colsample_bytree': 0.9195352964526833, 'feature_fraction': 0.5524295792763518, 'learning_rate': 0.0273333120748


Model #22
Best MAE: -1.8824
Best params: {'bagging_fraction': 0.8670140089927842, 'colsample_bytree': 0.9399760402267441, 'feature_fraction': 0.5818035893192751, 'learning_rate': 0.017537011670617206, 'max_depth': 16, 'min_child_weight': 4, 'n_estimators': 2301, 'num_leaves': 138, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'subsample': 0.5930070264428381, 'subsample_freq': 6}

Model #23
Best MAE: -1.8824
Best params: {'bagging_fraction': 0.8670140089927842, 'colsample_bytree': 0.9399760402267441, 'feature_fraction': 0.5818035893192751, 'learning_rate': 0.017537011670617206, 'max_depth': 16, 'min_child_weight': 4, 'n_estimators': 2301, 'num_leaves': 138, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'subsample': 0.5930070264428381, 'subsample_freq': 6}

Model #24
Best MAE: -1.8824
Best params: {'bagging_fraction': 0.8670140089927842, 'colsample_bytree': 0.9399760402267441, 'feature_fraction': 0.5818035893192751, 'learning_rate': 0.

In [12]:
print(result.best_score_)
print(result.best_params_)

-1.8787537289197256
{'bagging_fraction': 0.9909288549451587, 'colsample_bytree': 0.5140338264489366, 'feature_fraction': 0.5, 'learning_rate': 0.02268051198931534, 'max_depth': 13, 'min_child_weight': 5, 'n_estimators': 2956, 'num_leaves': 163, 'reg_alpha': 0.01615836660932548, 'reg_lambda': 1.4389764733397212e-08, 'subsample': 0.6305589558406525, 'subsample_freq': 10}


In [13]:
lgb_model1 = result.best_estimator_

In [14]:
yy_test_hat = lgb_model1.predict(XX1_test)
lgb_MAE1 = mean_absolute_error(yy1_test, yy_test_hat)
print(lgb_MAE1)

1.807619894739364


with open('lgb1_bo.pkl', 'wb') as fid:
    pickle.dump(lgb_model1, fid)

In [15]:
# add a default lgb model
lgb_model1d = lgb.LGBMRegressor(boosting_type='gbdt', object='regression', random_state=42)
lgb_model1d.fit(XX1_train, yy1_train, eval_metric='mae')
yy_test_hat = lgb_model1d.predict(XX1_test)
lgb_MAE1d = mean_absolute_error(yy1_test, yy_test_hat)
print(lgb_MAE1d)

1.7788005251875316


with open('lgb1_default.pkl', 'wb') as fid:
    pickle.dump(lgb_model1d, fid)

---

Second LGB Model with 2nd 4000 sample data

In [17]:
X_train5 = pd.read_pickle("Xtf_train11.pkl")
X_train7 = pd.read_pickle("Xtf_train33.pkl")
X_train8 = pd.read_pickle("Xtf_train44.pkl")
X2_train = pd.concat([X_train5, X_train7, X_train8], ignore_index=True)
X2_train.reset_index(drop=True)
print(X2_train.shape)
X2_train_reduced = X2_train.drop(feature2drop,axis=1)
print(X2_train_reduced.shape)

(3000, 788)
(3000, 717)


In [18]:
y2_train_1 = pd.read_pickle("y_train2.pkl")[0:1000]
y2_train_2 = pd.read_pickle("y_train2.pkl")[2000:4000]
y2_train = pd.concat([y2_train_1, y2_train_2], ignore_index=True)
y2_train.reset_index(drop=True)
print(y2_train.shape)

(3000,)


In [19]:
XX2_train, XX2_test, yy2_train, yy2_test = train_test_split(X2_train_reduced , y2_train, test_size=0.2, random_state=42)
print(XX2_train.shape, yy2_train.shape)
print(XX2_test.shape, yy2_test.shape)

(2400, 717) (2400,)
(600, 717) (600,)


In [20]:
result = bayes_cv_tuner.fit(XX2_train, yy2_train, callback=status_print)

Model #1
Best MAE: -1.9107
Best params: {'bagging_fraction': 0.705051979426657, 'colsample_bytree': 0.7304484857455519, 'feature_fraction': 0.966433999423917, 'learning_rate': 0.02263198373948195, 'max_depth': 14, 'min_child_weight': 4, 'n_estimators': 1761, 'num_leaves': 190, 'reg_alpha': 5.497557739289786e-07, 'reg_lambda': 0.05936070635912049, 'subsample': 0.5544643023916863, 'subsample_freq': 1}

Model #2
Best MAE: -1.9025
Best params: {'bagging_fraction': 0.9186941777766422, 'colsample_bytree': 0.8844821246070537, 'feature_fraction': 0.6517050549420875, 'learning_rate': 0.04804895626373318, 'max_depth': 18, 'min_child_weight': 1, 'n_estimators': 700, 'num_leaves': 92, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'subsample': 0.6336020558163782, 'subsample_freq': 10}

Model #3
Best MAE: -1.8913
Best params: {'bagging_fraction': 0.7224162561505759, 'colsample_bytree': 0.9195352964526833, 'feature_fraction': 0.5524295792763518, 'learning_rate': 0.0273333120748


Model #22
Best MAE: -1.8823
Best params: {'bagging_fraction': 0.7971279436272822, 'colsample_bytree': 0.9792929201665225, 'feature_fraction': 0.5828281310022894, 'learning_rate': 0.027203487011707043, 'max_depth': 3, 'min_child_weight': 10, 'n_estimators': 706, 'num_leaves': 243, 'reg_alpha': 2.2703134668787435e-07, 'reg_lambda': 122.46781185567828, 'subsample': 0.48035795931229164, 'subsample_freq': 3}

Model #23
Best MAE: -1.8823
Best params: {'bagging_fraction': 0.7971279436272822, 'colsample_bytree': 0.9792929201665225, 'feature_fraction': 0.5828281310022894, 'learning_rate': 0.027203487011707043, 'max_depth': 3, 'min_child_weight': 10, 'n_estimators': 706, 'num_leaves': 243, 'reg_alpha': 2.2703134668787435e-07, 'reg_lambda': 122.46781185567828, 'subsample': 0.48035795931229164, 'subsample_freq': 3}

Model #24
Best MAE: -1.8823
Best params: {'bagging_fraction': 0.7971279436272822, 'colsample_bytree': 0.9792929201665225, 'feature_fraction': 0.5828281310022894, 'learning_rate': 0.02

In [21]:
print(result.best_score_)
print(result.best_params_)

-1.8823464537754624
{'bagging_fraction': 0.7971279436272822, 'colsample_bytree': 0.9792929201665225, 'feature_fraction': 0.5828281310022894, 'learning_rate': 0.027203487011707043, 'max_depth': 3, 'min_child_weight': 10, 'n_estimators': 706, 'num_leaves': 243, 'reg_alpha': 2.2703134668787435e-07, 'reg_lambda': 122.46781185567828, 'subsample': 0.48035795931229164, 'subsample_freq': 3}


In [22]:
lgb_model2 = result.best_estimator_

In [23]:
yy_test_hat = lgb_model2.predict(XX2_test)
lgb_MAE2 = mean_absolute_error(yy2_test, yy_test_hat)
print(lgb_MAE2)

1.8807917531082945


with open('lgb2_bo.pkl', 'wb') as fid:
    pickle.dump(lgb_model2, fid)

In [24]:
# add a default lgb model
lgb_model2d = lgb.LGBMRegressor(boosting_type='gbdt', object='regression', random_state=42)
lgb_model2d.fit(XX2_train, yy2_train, eval_metric='mae')
yy_test_hat = lgb_model2d.predict(XX2_test)
lgb_MAE2d = mean_absolute_error(yy2_test, yy_test_hat)
print(lgb_MAE2d)

1.8920002293583738


with open('lgb2_default.pkl', 'wb') as fid:
    pickle.dump(lgb_model2d, fid)

---

Add a default lgb model for all (8 sets) training data

In [25]:
XX12_train = pd.concat([XX1_train, XX2_train], ignore_index=True)
yy12_train = pd.concat([yy1_train, yy2_train], ignore_index=True)
XX12_test = pd.concat([XX1_test, XX2_test], ignore_index=True)
yy12_test = pd.concat([yy1_test, yy2_test], ignore_index=True)
print(XX12_train.shape)
print(yy12_train.shape)
print(XX12_test.shape)
print(yy12_test.shape)


(4800, 717)
(4800,)
(1200, 717)
(1200,)


In [26]:
lgb_model0d = lgb.LGBMRegressor(boosting_type='gbdt', object='regression', random_state=42)
lgb_model0d.fit(XX12_train, yy12_train, eval_metric='mae')
yy_test_hat = lgb_model0d.predict(XX12_test)
lgb_MAE0d = mean_absolute_error(yy12_test, yy_test_hat)
print(lgb_MAE0d)

1.784563789063644


with open('lgb0_default.pkl', 'wb') as fid:
    pickle.dump(lgb_model0d, fid)

---
## Final start from here

In [38]:
svr_cv_tuner = BayesSearchCV(
    estimator = SVR(),
    search_spaces = {
        'C': (1e-6, 1e+6, 'log-uniform'),  
        'gamma': (1e-6, 1e+1, 'log-uniform'),
        'epsilon': (1e-6, 1e+1, 'log-uniform'),
        'kernel': ['rbf'],  # categorical parameter
    },    
    scoring = 'neg_mean_absolute_error',
    cv = KFold(shuffle=True, random_state=42),
    n_iter = 30,   
    verbose = 0,
    refit = True,
    random_state = 42
)

In [28]:
def svr_status_print(optim_result):
    """Status callback durring bayesian hyperparameter search"""
    
    # Get all the models tested so far in DataFrame format
    all_models = pd.DataFrame(svr_cv_tuner.cv_results_)    
    
    # Get current parameters and the best parameters    
    best_params = pd.Series(svr_cv_tuner.best_params_)
    print('Model #{}\nBest MAE: {}\nBest params: {}\n'.format(
        len(all_models),
        np.round(svr_cv_tuner.best_score_, 4),
        svr_cv_tuner.best_params_
    ))

with open('lgb0_default.pkl','rb') as fid:
    lgb_model0d = pickle.load(fid)
    
with open('lgb1_default.pkl','rb') as fid:
    lgb_model1d = pickle.load(fid)
    
with open('lgb1_bo.pkl','rb') as fid:
    lgb_model1 = pickle.load(fid)
    
with open('lgb2_default.pkl','rb') as fid:
    lgb_model2d = pickle.load(fid)
    
with open('lgb2_bo.pkl','rb') as fid:
    lgb_model2 = pickle.load(fid)

In [29]:
temp1 = pd.Series(lgb_model0d.predict(XX12_test))
temp3 = pd.Series(lgb_model1.predict(XX12_test))
temp5 = pd.Series(lgb_model2.predict(XX12_test))
tempDF = pd.concat([temp1,temp3,temp5], axis=1, ignore_index=True)
print(tempDF.shape)
print(tempDF.head())

(1200, 3)
          0         1         2
0  9.640654  9.399276  9.973347
1  3.111947  3.641285  3.388832
2  8.208186  8.581705  8.690141
3  4.155285  4.822063  3.077576
4  2.307458  3.144112  2.466820


In [30]:
result = svr_cv_tuner.fit(tempDF, yy12_test, callback=svr_status_print)

Model #1
Best MAE: -2.8738
Best params: {'C': 0.08341564384216595, 'degree': 6, 'epsilon': 3.389034515643755, 'gamma': 0.00016240416181810798, 'kernel': 'rbf'}

Model #2
Best MAE: -2.8738
Best params: {'C': 0.08341564384216595, 'degree': 6, 'epsilon': 3.389034515643755, 'gamma': 0.00016240416181810798, 'kernel': 'rbf'}

Model #3
Best MAE: -1.8962
Best params: {'C': 0.21776603694820984, 'degree': 7, 'epsilon': 5.420184998488358e-06, 'gamma': 0.0010797659128262546, 'kernel': 'rbf'}

Model #4
Best MAE: -1.8962
Best params: {'C': 0.21776603694820984, 'degree': 7, 'epsilon': 5.420184998488358e-06, 'gamma': 0.0010797659128262546, 'kernel': 'rbf'}

Model #5
Best MAE: -1.8962
Best params: {'C': 0.21776603694820984, 'degree': 7, 'epsilon': 5.420184998488358e-06, 'gamma': 0.0010797659128262546, 'kernel': 'rbf'}

Model #6
Best MAE: -1.7643
Best params: {'C': 643.1854566491933, 'degree': 8, 'epsilon': 1.3971732124886836e-05, 'gamma': 2.084380695810582e-05, 'kernel': 'rbf'}

Model #7
Best MAE: -1.7

In [31]:
print(result.best_score_)
print(result.best_params_)

-1.763334719687103
{'C': 727422.3705426904, 'degree': 6, 'epsilon': 1e-06, 'gamma': 1e-06, 'kernel': 'rbf'}


In [32]:
# final model
Fmodel = result.best_estimator_

---
## Submission

In [34]:
X_test = pd.read_pickle('Xtest_DF788.pkl')
X_test= X_test.drop(feature2drop,axis=1)
print(X_test.shape)

(2624, 717)


In [35]:
temp1 = pd.Series(lgb_model0d.predict(X_test))
temp3 = pd.Series(lgb_model1.predict(X_test))
temp5 = pd.Series(lgb_model2.predict(X_test))
tempDF = pd.concat([temp1,temp3,temp5], axis=1, ignore_index=True)
print(tempDF.shape)
print(tempDF.head())

(2624, 3)
          0         1         2
0  3.411266  4.008003  4.211504
1  4.932262  5.429075  5.676272
2  6.201557  5.959195  6.116863
3  8.131583  7.283381  8.136434
4  6.210477  6.076866  6.163148


In [36]:
y_hat = Fmodel.predict(tempDF)
print(y_hat.shape)
print(y_hat[:10])

(2624,)
[3.34159248 4.98661114 6.04580362 8.0004566  6.09624519 1.8812236
 6.42284252 3.32941823 4.68042215 1.43017059]


In [37]:
submission = pd.read_csv('sample_submission.csv', index_col='seg_id')
submission['time_to_failure'] = y_hat.clip(0, 16)
submission.to_csv('submission_lgb_svr_03.csv')