In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler
import lightgbm as lgb
import catboost as cb

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression, Ridge, Lasso
from fastFM import als, mcmc, sgd
from pyfm import pylibfm

import optuna

from common import EP

import types

Using TensorFlow backend.


In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
pd.set_option('display.max_colwidth', -1)

In [4]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [5]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [6]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [7]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [9]:
# mytrial = []
mytrial = list(pd.read_pickle('../trial/fm.pkl').T.to_dict().values())
df_trial = pd.DataFrame(mytrial)
len(mytrial)

10

In [39]:
param = {'algorithm': {'cls': 'als.FMRegression',
  'fit': {},
  'init': {'n_iter': 10,
   'init_stdev': 0.00030963137584220923,
   'rank': 2,
   'random_state': 42,
   'l2_reg_w': 0.1,
   'l2_reg_V': 0.1,
   'l2_reg': 0}},
 'columns': ['q25_roll_std_100',
 'abs_q25_5',
 'iqr_6',
 'abs_q01_4',
 'abs_q75_7',
 'spkt_welch_density__coeff_3',
 'spkt_welch_densitycoeff_2'],
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler'}}

In [40]:
# run one try
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial)

In [32]:
#  tune hypterparameters
def objective(trial):

    n_iter = trial.suggest_int('n_iter', 10, 100)
    init_stdev = trial.suggest_uniform('init_stdev', 0.00001, .01)
    rank = trial.suggest_int('rank', 2, 16)
        
    args={'algorithm': {'cls': 'als.FMRegression',
      'fit': {},
      'init': {'n_iter': n_iter,
       'init_stdev': init_stdev,
       'rank': rank,
       'random_state': 42,
       'l2_reg_w': 0.1,
       'l2_reg_V': 0.1,
       'l2_reg': 0}},
     'columns': param['columns'].copy(),
     'kfold': {'n_splits': 3,
      'random_state': 1985,
      'shuffle': True,
      'type': 'group'},
     'scaler': {'cls': 'StandardScaler'}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 11')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=100)

[I 2019-05-17 02:08:06,859] Finished a trial resulted in value: 1.1128082411819047. Current best value is 1.1128082411819047 with parameters: {'n_iter': 18, 'init_stdev': 0.0003839582549693036, 'rank': 13}.
[I 2019-05-17 02:10:26,770] Finished a trial resulted in value: 1.5999636319655133. Current best value is 1.1128082411819047 with parameters: {'n_iter': 18, 'init_stdev': 0.0003839582549693036, 'rank': 13}.
[I 2019-05-17 02:13:09,545] Finished a trial resulted in value: 1.8238935807577812. Current best value is 1.1128082411819047 with parameters: {'n_iter': 18, 'init_stdev': 0.0003839582549693036, 'rank': 13}.
[I 2019-05-17 02:14:38,769] Finished a trial resulted in value: 1.2556704673348715. Current best value is 1.1128082411819047 with parameters: {'n_iter': 18, 'init_stdev': 0.0003839582549693036, 'rank': 13}.
[I 2019-05-17 02:15:43,944] Finished a trial resulted in value: 0.7608375042065423. Current best value is 0.7608375042065423 with parameters: {'n_iter': 82, 'init_stdev': 0

[I 2019-05-17 02:56:05,391] Finished a trial resulted in value: 1.5505349162192792. Current best value is 0.5649772799486461 with parameters: {'n_iter': 26, 'init_stdev': 0.0005812212418646982, 'rank': 2}.
[I 2019-05-17 02:56:26,911] Finished a trial resulted in value: 0.9817260560002663. Current best value is 0.5649772799486461 with parameters: {'n_iter': 26, 'init_stdev': 0.0005812212418646982, 'rank': 2}.
[I 2019-05-17 02:57:50,118] Finished a trial resulted in value: 1.1288775441832095. Current best value is 0.5649772799486461 with parameters: {'n_iter': 26, 'init_stdev': 0.0005812212418646982, 'rank': 2}.
[I 2019-05-17 02:58:33,533] Finished a trial resulted in value: 1.0078181036916154. Current best value is 0.5649772799486461 with parameters: {'n_iter': 26, 'init_stdev': 0.0005812212418646982, 'rank': 2}.
[I 2019-05-17 02:59:28,165] Finished a trial resulted in value: 0.6640968083781947. Current best value is 0.5649772799486461 with parameters: {'n_iter': 26, 'init_stdev': 0.000

[I 2019-05-17 03:25:59,178] Finished a trial resulted in value: 1.0447810673152786. Current best value is 0.5649772799486461 with parameters: {'n_iter': 26, 'init_stdev': 0.0005812212418646982, 'rank': 2}.
[I 2019-05-17 03:26:19,883] Finished a trial resulted in value: 0.8113551762970573. Current best value is 0.5649772799486461 with parameters: {'n_iter': 26, 'init_stdev': 0.0005812212418646982, 'rank': 2}.
[I 2019-05-17 03:27:28,507] Finished a trial resulted in value: 1.0218478663588446. Current best value is 0.5649772799486461 with parameters: {'n_iter': 26, 'init_stdev': 0.0005812212418646982, 'rank': 2}.
[I 2019-05-17 03:28:14,115] Finished a trial resulted in value: 1.1454519010074924. Current best value is 0.5649772799486461 with parameters: {'n_iter': 26, 'init_stdev': 0.0005812212418646982, 'rank': 2}.
[I 2019-05-17 03:28:45,289] Finished a trial resulted in value: 0.9815758139565772. Current best value is 0.5649772799486461 with parameters: {'n_iter': 26, 'init_stdev': 0.000

In [43]:
df_trial = pd.DataFrame(mytrial)
df_trial['algorithm-init'] = df_trial['param'].apply(lambda x: x['algorithm']['init'])
df_trial['kfold-type'] = df_trial['param'].apply(lambda x: x['kfold']['type'])
#[(df_trial['remark']=='tune 11')&(df_trial['mae_diff']<0.3)].sort_values(by=['val_mae'])
df_trial[['datetime','algorithm-init','kfold-type','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].tail(1)

Unnamed: 0,datetime,algorithm-init,kfold-type,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
313,2019-05-17 04:14:15.187700,"{'n_iter': 10, 'init_stdev': 0.00030963137584220923, 'rank': 2, 'random_state': 42, 'l2_reg_w': 0.1, 'l2_reg_V': 0.1, 'l2_reg': 0}",group,7,2.099356,0.00307,2.10857,0.01448,0.009214


In [44]:
df_submit = pd.DataFrame()
df_submit['time_to_failure'] = np.mean(df_test_pred.drop(columns=['index']).values, axis=1)
df_submit['seg_id'] = df_test_pred['index']
df_submit.to_csv('submission.csv', index=False)

In [44]:
df_trial.to_pickle('../trial/fm.pkl')