In [1]:
import pandas as pd
import numpy as np

df_wide = pd.read_csv('./input/ch03/time_series_wide.csv', index_col = 0)
df_wide.index = pd.to_datetime(df_wide.index)

In [2]:
print(df_wide.iloc[:5,:3])

              A     B     C
2016-07-01  532  3314  1136
2016-07-02  798  2461  1188
2016-07-03  823  3522  1711
2016-07-04  937  5451  1977
2016-07-05  881  4729  1975


In [3]:
df_long = df_wide.stack().reset_index(1)

In [4]:
print(df_long)

           level_1     0
2016-07-01       A   532
2016-07-01       B  3314
2016-07-01       C  1136
2016-07-02       A   798
2016-07-02       B  2461
...            ...   ...
2016-12-30       B  4243
2016-12-30       C  2069
2016-12-31       A   869
2016-12-31       B  4703
2016-12-31       C  2233

[552 rows x 2 columns]


In [5]:
df_long.columns = ['id', 'value']

In [6]:
print(df_long)

           id  value
2016-07-01  A    532
2016-07-01  B   3314
2016-07-01  C   1136
2016-07-02  A    798
2016-07-02  B   2461
...        ..    ...
2016-12-30  B   4243
2016-12-30  C   2069
2016-12-31  A    869
2016-12-31  B   4703
2016-12-31  C   2233

[552 rows x 2 columns]


In [7]:
df_wide = df_long.pivot(index=None, columns='id', values='value')

In [8]:
print(df_wide)

id            A     B     C
2016-07-01  532  3314  1136
2016-07-02  798  2461  1188
2016-07-03  823  3522  1711
2016-07-04  937  5451  1977
2016-07-05  881  4729  1975
...         ...   ...   ...
2016-12-27  840  4573  1850
2016-12-28  943  4511  1764
2016-12-29  978  4599  1787
2016-12-30  907  4243  2069
2016-12-31  869  4703  2233

[184 rows x 3 columns]


In [10]:
train = pd.read_csv('./input/sample-data/train_preprocessed.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']
test_x = pd.read_csv('./input/sample-data/test_preprocessed.csv')

In [11]:
import xgboost as xgb
from sklearn.metrics import log_loss
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True , random_state=71)
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]
tr_x.head()

Unnamed: 0,age,sex,height,weight,product,amount,medical_info_a1,medical_info_a2,medical_info_a3,medical_info_b1,...,medical_keyword_5,medical_keyword_6,medical_keyword_7,medical_keyword_8,medical_keyword_9,medical_keyword_10,year,month,day,yearmonth
0,50,1,166.445608,65.016732,9,7000000,134,202,1,11,...,0,1,0,1,0,0,2015,2,3,24182
1,68,0,164.334615,56.544217,0,7000000,438,263,3,14,...,0,0,1,1,0,0,2015,5,9,24185
2,77,1,167.462917,54.242267,2,6000000,313,325,1,18,...,0,1,0,1,0,0,2016,2,13,24194
3,17,1,177.097725,71.147762,3,8000000,342,213,2,11,...,0,0,0,1,0,0,2015,7,6,24187
4,62,0,158.165788,65.240697,1,9000000,327,102,0,14,...,0,0,1,1,1,0,2016,9,17,24201


In [12]:
dtrain = xgb.DMatrix(tr_x, label=tr_y)
dvalid = xgb.DMatrix(va_x, label=va_y)
dtest = xgb.DMatrix(test_x)

params = {'objective':  'binary:logistic', 'silent': 1, 'random_state': 71}
num_round = 50

watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
model = xgb.train(params, dtrain, num_round, evals=watchlist)

va_pred = model.predict(dvalid)
score=log_loss(va_y, va_pred)
print(f'logloss: {score:.4f}')

pred = model.predict(dtest)

Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-logloss:0.54088	eval-logloss:0.55003
[1]	train-logloss:0.45269	eval-logloss:0.47182
[2]	train-logloss:0.39482	eval-logloss:0.42026
[3]	train-logloss:0.35198	eval-logloss:0.38520
[4]	train-logloss:0.32021	eval-logloss:0.36150
[5]	train-logloss:0.29673	eval-logloss:0.34463
[6]	train-logloss:0.27610	eval-logloss:0.32900
[7]	train-logloss:0.25886	eval-logloss:0.31670
[8]	train-logloss:0.24363	eval-logloss:0.30775
[9]	train-logloss:0.23153	eval-logloss:0.30093
[10]	train-logloss:0.22016	eval-logloss:0.29413
[11]	train-logloss:0.20963	eval-logloss:0.28528
[12]	train-logloss:0.19951	eval-logloss:0.27912
[13]	train-logloss:0.19324	eval-logloss:0.27642
[14]	train-logloss:0.1854

In [18]:
params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71, 'eval_metric': 'logloss'}
num_round= 500
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
model = xgb.train(params, dtrain, num_round, evals=watchlist, early_stopping_rounds=20)


Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-logloss:0.54088	eval-logloss:0.55003
[1]	train-logloss:0.45269	eval-logloss:0.47182
[2]	train-logloss:0.39482	eval-logloss:0.42026
[3]	train-logloss:0.35198	eval-logloss:0.38520
[4]	train-logloss:0.32021	eval-logloss:0.36150
[5]	train-logloss:0.29673	eval-logloss:0.34463
[6]	train-logloss:0.27610	eval-logloss:0.32900
[7]	train-logloss:0.25886	eval-logloss:0.31670
[8]	train-logloss:0.24363	eval-logloss:0.30775
[9]	train-logloss:0.23153	eval-logloss:0.30093
[10]	train-logloss:0.22016	eval-logloss:0.29413
[11]	train-logloss:0.20963	eval-logloss:0.28528
[12]	train-logloss:0.19951	eval-logloss:0.27912
[13]	train-logloss:0.19324	eval-logloss:0.27642
[14]	train-logloss:0.1854

In [21]:
import lightgbm as lgb
from sklearn.metrics import log_loss

lgb_train = lgb.Dataset(tr_x, tr_y)
lgb_eval = lgb.Dataset(va_x, va_y)

params = {'objective': 'binary', 'seed': 71, 'verbose': 0, 'metrics': 'binary_logloss'}
num_round = 100

categorical_features = ['product', 'medical_info_b2', 'medical_info_b3']
model = lgb.train(params, lgb_train , num_boost_round=num_round, categorical_feature=categorical_features, valid_names=['train', 'valid'], valid_sets=[lgb_train, lgb_eval])

va_pred = model.predict(va_x)
score = log_loss(va_y, va_pred)
print(f'logloss: {score: .4f}')

pred = model.predict(test_x)

New categorical_feature is ['medical_info_b2', 'medical_info_b3', 'product']


You can set `force_col_wise=true` to remove the overhead.
[1]	train's binary_logloss: 0.454286	valid's binary_logloss: 0.4654
[2]	train's binary_logloss: 0.429348	valid's binary_logloss: 0.443537
[3]	train's binary_logloss: 0.409269	valid's binary_logloss: 0.425588
[4]	train's binary_logloss: 0.393109	valid's binary_logloss: 0.411213
[5]	train's binary_logloss: 0.379351	valid's binary_logloss: 0.399341
[6]	train's binary_logloss: 0.366138	valid's binary_logloss: 0.389055
[7]	train's binary_logloss: 0.35417	valid's binary_logloss: 0.378254
[8]	train's binary_logloss: 0.343782	valid's binary_logloss: 0.370131
[9]	train's binary_logloss: 0.334283	valid's binary_logloss: 0.362036
[10]	train's binary_logloss: 0.324802	valid's binary_logloss: 0.353452
[11]	train's binary_logloss: 0.316592	valid's binary_logloss: 0.346904
[12]	train's binary_logloss: 0.308484	valid's binary_logloss: 0.340248
[13]	train's binary_logloss: 0.301468	valid's binary_logloss: 0.335801
[14]	train's binary_logloss: 0.

In [27]:
from keras.layers import Dense, Dropout
from keras.models import Sequential
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler

train = pd.read_csv('./input/sample-data/train_preprocessed_onehot.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']
test_x = pd.read_csv('./input/sample-data/test_preprocessed_onehot.csv')

# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]
scaler = StandardScaler()
tr_x = scaler.fit_transform(tr_x)
va_x = scaler.transform(va_x)
test_x = scaler.transform(test_x)

model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(train_x.shape[1],)))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

batch_size = 128
epochs = 10
history = model.fit(tr_x, tr_y, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(va_x, va_y))

va_pred = model.predict(va_x)
score = log_loss(va_y, va_pred, eps=1e-7)
print(f'logloss: {score: .4f}')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
logloss:  0.3015


In [29]:
from keras.callbacks import EarlyStopping

epochs  = 50
early_stopping = EarlyStopping(monitor='val_loss', patience = 20, restore_best_weights=True)

history = model.fit(tr_x, tr_y, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(va_x, va_y), callbacks=[early_stopping])
pred=model.predict(test_x)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50


In [30]:

train = pd.read_csv('./input/sample-data/train_preprocessed_onehot.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']
test_x = pd.read_csv('./input/sample-data/test_preprocessed_onehot.csv')

from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
tr_x = scaler.fit_transform(tr_x)
va_x = scaler.transform(va_x)
test_x = scaler.transform(test_x)

model = LogisticRegression(C=1)
model.fit(tr_x, tr_y)
va_pred = model.predict_proba(va_x)
score = log_loss(va_y, va_pred)
print(f'logloss: {score:.4f}')

pred = model.predict(test_x)

logloss: 0.3720


In [35]:
param1_list = [3,5,7,9]
param2_list  = [1,2,3,4,5]

grid_search_params = []
for p1 in param1_list:
    for p2 in param2_list:
        grid_search_params.append((p1,p2))
        
random_search_params = []
trials = 15
for i in range(trials):
    p1 = np.random.choice(param1_list)
    p2 = np.random.choice(param2_list)
    random_search_params.append((p1,p2))

In [36]:
print(grid_search_params)

[(3, 1), (3, 2), (3, 3), (3, 4), (3, 5), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5), (7, 1), (7, 2), (7, 3), (7, 4), (7, 5), (9, 1), (9, 2), (9, 3), (9, 4), (9, 5)]


In [37]:
random_search_params

[(9, 4),
 (5, 5),
 (7, 2),
 (9, 4),
 (3, 2),
 (3, 3),
 (7, 4),
 (7, 4),
 (9, 3),
 (9, 2),
 (5, 4),
 (9, 5),
 (5, 5),
 (7, 2),
 (9, 4)]

In [38]:
grid_search_params

[(3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (3, 5),
 (5, 1),
 (5, 2),
 (5, 3),
 (5, 4),
 (5, 5),
 (7, 1),
 (7, 2),
 (7, 3),
 (7, 4),
 (7, 5),
 (9, 1),
 (9, 2),
 (9, 3),
 (9, 4),
 (9, 5)]

In [47]:
# ---------------------------------
# データ等の準備
# ----------------------------------
import numpy as np
import pandas as pd

# train_xは学習データ、train_yは目的変数、test_xはテストデータ
# pandasのDataFrame, Seriesで保持します。（numpyのarrayで保持することもあります）

train = pd.read_csv('./input/sample-data/train_preprocessed.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']
test_x = pd.read_csv('./input/sample-data/test_preprocessed.csv')

# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

# xgboostによる学習・予測を行うクラス
import xgboost as xgb


class Model:

    def __init__(self, params=None):
        self.model = None
        if params is None:
            self.params = {}
        else:
            self.params = params

    def fit(self, tr_x, tr_y, va_x, va_y):
        params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71}
        params.update(self.params)
        num_round = 10
        dtrain = xgb.DMatrix(tr_x, label=tr_y)
        dvalid = xgb.DMatrix(va_x, label=va_y)
        watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
        self.model = xgb.train(params, dtrain, num_round, evals=watchlist)

    def predict(self, x):
        data = xgb.DMatrix(x)
        pred = self.model.predict(data)
        return pred

from hyperopt import  hp


In [48]:

space= {
    'activation': hp.choice('activation', ['prelu', 'relu']),
    'dropout': hp.uniform('dropout', 0, 0.2),
    'units': hp.quniform('units', 32,256, 32),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.01))
}

In [52]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import log_loss

def score(params):
    params['max_depth'] = int(params['max_depth'])
    
    model = Model(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    score = log_loss(va_y, va_pred)
    print(f'params: {params}, logloss: {score:.4f}')
    
    history.append((params, score))
    
    return {'loss': score, 'status': STATUS_OK}
space = {
    'min_child_weight': hp.quniform('min_child_weight', 1, 5, 1),
    'max_depth': hp.quniform('max_depth', 3, 9, 1),
    'gamma': hp.quniform('gamma', 0, 0.4, 0.1)
}

max_evals = 10
trials = Trials()
history = []
fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=max_evals)

history = sorted(history, key = lambda tpl: tpl
                [1])
best = history[0]
print(f'best params: {best[0]}, score: {best[1]: .4f}')

Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-logloss:0.53361	eval-logloss:0.54380                                                                                                                                                              
[1]	train-logloss:0.44076	eval-logloss:0.46340                                                                                                                                                              
[2]	train-logloss:0.37868	eval-logloss:0.41426                                                                                                                                                              
[3]	train-logloss:0.33054	eval-logloss:0.37579                          

params: {'gamma': 0.1, 'max_depth': 7, 'min_child_weight': 2.0}, logloss: 0.2880                                                                                                                            
Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-logloss:0.54201	eval-logloss:0.55032                                                                                                                                                              
[1]	train-logloss:0.45476	eval-logloss:0.47191                                                                                                                                                              
[2]	train-logloss:0.39750	eval-logloss:0.42154                          

[9]	train-logloss:0.27036	eval-logloss:0.31086                                                                                                                                                              
params: {'gamma': 0.30000000000000004, 'max_depth': 5, 'min_child_weight': 3.0}, logloss: 0.3109                                                                                                            
Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-logloss:0.54255	eval-logloss:0.54999                                                                                                                                                              
[1]	train-logloss:0.45543	eval-logloss:0.47201                          

[8]	train-logloss:0.30750	eval-logloss:0.32746                                                                                                                                                              
[9]	train-logloss:0.29778	eval-logloss:0.32189                                                                                                                                                              
params: {'gamma': 0.2, 'max_depth': 4, 'min_child_weight': 2.0}, logloss: 0.3219                                                                                                                            
Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-logloss:0.54889	eval-logloss:0.55578                          

In [54]:
# ---------------------------------
# データ等の準備
# ----------------------------------
import numpy as np
import pandas as pd

# train_xは学習データ、train_yは目的変数、test_xはテストデータ
# pandasのDataFrame, Seriesで保持します。（numpyのarrayで保持することもあります）

train = pd.read_csv('./input/sample-data/train_preprocessed_onehot.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']
test_x = pd.read_csv('./input/sample-data/train_preprocessed_onehot.csv')

# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]


In [56]:
from hyperopt import hp
from keras.callbacks import EarlyStopping

In [75]:
from keras.layers import ReLU, PReLU
from keras.layers import Dense, Dropout
from keras.layers import BatchNormalization
from keras.models import Sequential
from keras.optimizers import SGD, Adam
from sklearn.preprocessing import StandardScaler

base_param = {
    'input_dropout': 0.0,
    'hidden_layers': 3,
    'hidden_units': 96,
    'hidden_activation': 'relu',
    'hidden_dropout': 0.2,
    'batch_norm': 'before_act',
    'optimizer': {'type': 'adam', 'lr': 0.001},
    'batch_size': 64,
}


In [76]:
param_space = {
    'input_dropout': hp.quniform('input_dropout', 0, 0.2, 0.05),
    'hidden_layers': hp.quniform('hidden_layers', 2, 4, 1),
    'hidden_units': hp.quniform('hidden_units', 32, 256, 32),
    'hidden_activation': hp.choice('hidden_activation', ['prelu', 'relu']),
    'hidden_dropout': hp.quniform('hidden_dropout', 0, 0.3, 0.05),
    'batch_norm': hp.choice('batch_norm', ['before_act', 'no']),
    'optimizer': hp.choice('optimizer', [{'type': 'adam', 'lr': hp.loguniform('adam_lr', np.log(0.00001), np.log(0.01))}, {'type': 'sgd', 'lr': hp.loguniform('sgd_lr', np.log(0.00001), np.log(0.01))}]),
    'batch_size': hp.quniform('batch_size', 32, 128, 32),
}



In [79]:
class MLP:
    def __init__(self, params):
        self.params = params
        self.scaler = None
        self.model = None
        
    def fit(self, tr_x, tr_y, va_x, va_y):
        
        input_dropout = self.params['input_dropout']
        hidden_layers = self.params['hidden_layers']
        hidden_units = self.params['hidden_units']
        hidden_activation = self.params['hidden_activation']
        hidden_dropout = self.params['hidden_dropout']
        batch_norm = self.params['batch_norm']
        optimizer_type = self.params['optimizer']['type']
        optimizer_lr = self.params['optimizer']['lr']
        batch_size =int(self.params['batch_size'])
        
        self.scaler = StandardScaler()
        tr_x = self.scaler.fit_transform(tr_x)
        va_x = self.scaler.transform(va_x)
        
        self.model = Sequential()
        
        self.model.add(Dropout(input_dropout, input_shape=(tr_x.shape[1],)))
        
        for i in range(int(hidden_layers)):
            self.model.add(Dense(hidden_units))
            if batch_norm == 'before_act':
                self.model.add(BatchNormalization())
            if hidden_activation == 'prelu':
                self.model.add(PReLU())
            elif hidden_activation == 'relu':
                self.model.add(ReLU())
            else:
                raise NotImplementedError
            self.model.add(Dense(hidden_dropout))
                
        self.model.add(Dense(1, activation='sigmoid'))
            
        if optimizer_type == 'sgd':
            optimizer = SGD(lr=optimizer_lr, decay = 1e-6, momentum=0.9, nesterov=True)
        elif optimizer_type == 'adam':
            optimizer = Adam(lr=optimizer_lr, beta_1 = 0.9, beta_2=0.999,decay=0.)
        else:
            raise NotImplementedError
                
        self.model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
            
        nb_epoch = 200
        patience = 20
        early_stopping = EarlyStopping(patience=patience, restore_best_weights=True)
            
        history = self.model.fit(tr_x, tr_y, epochs=nb_epoch, batch_size=batch_size, verbose=1, validation_data=(va_x, va_y), callbacks=[early_stopping])
            
    def predict(self, x):
        
        x = self.scaler.transform(x)
        y_pred = self.model.predict(x)
        y_pred = y_pred.flatten()
        return y_pred

In [80]:
from hyperopt import fmin, tpe, STATUS_OK, Trials
from sklearn.metrics import log_loss

def score(params):
    model = MLP(params)
    model.fit(tr_x, tr_y,  va_x, va_y)
    va_pred = model.predict(va_x)
    score = log_loss(va_y, va_pred)
    print(f'params: {params}, logloss: {score:.4f}')
    
    history.append((params, score))
    
    return {'loss': score, 'status': STATUS_OK}

max_evals = 10
trials = Trials()
history = []
fmin(score, param_space, algo=tpe.suggest, trials = trials, max_evals=max_evals)

history = sorted(history, key=lambda tpl: tpl[1])
best = history[0]
print(f'best params:{best[0]}, score: {best[1]:.4f}')

Epoch 1/200                                                                                                                                                                                                 

  0%|                                                                                                                                                                | 0/10 [00:00<?, ?trial/s, best loss=?]

  super(SGD, self).__init__(name, **kwargs)



  1/235 [..............................] - ETA: 2:18 - loss: 0.6931 - accuracy: 0.7500                                                                                                                      
 37/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8125                                                                                                                       

Epoch 2/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8438                                                                                                                        
 41/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7950                                                                                               


Epoch 6/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8438                                                                                                                        
 43/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7987                                                                                                                       

Epoch 7/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8125                                                                                             


Epoch 11/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.9062                                                                                                                        
 42/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8363                                                                                                                       

Epoch 12/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7812                                                                                             


Epoch 15/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8125                                                                                                                        
 39/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8197                                                                                                                       

Epoch 16/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7500                                                                                             


Epoch 20/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7812                                                                                                                        
 41/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7995                                                                                                                       

Epoch 21/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.6875                                                                                             

  super(Adam, self).__init__(name, **kwargs)



 1/59 [..............................] - ETA: 29s - loss: 0.6931 - accuracy: 0.7891                                                                                                                         

Epoch 2/200                                                                                                                                                                                                 

 1/59 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8516                                                                                                                          

Epoch 3/200                                                                                                                                                                                                 

 1/59 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7969                                                                                              


Epoch 11/200                                                                                                                                                                                                

 1/59 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7891                                                                                                                          

Epoch 12/200                                                                                                                                                                                                

 1/59 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7969                                                                                                                          

Epoch 13/200                                                                                                                                                                   


Epoch 21/200                                                                                                                                                                                                

 1/59 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8438                                                                                                                          

 1/79 [..............................] - ETA: 4s                                                                                                                                                            

params: {'batch_norm': 'no', 'batch_size': 128.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.1, 'hidden_layers': 2.0, 'hidden_units': 160.0, 'input_dropout': 0.05, 'optimizer': {'lr': 0.00014492534533124422, 'type': 'adam'}}, logloss: 0.6931
Epoch 1/200                                                                                                                         

  super(SGD, self).__init__(name, **kwargs)



  1/118 [..............................] - ETA: 1:14 - loss: 0.6931 - accuracy: 0.7812                                                                                                                      
 17/118 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8254                                                                                                                       

Epoch 2/200                                                                                                                                                                                                 

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7812                                                                                                                        

Epoch 3/200                                                                                                                                                                       


Epoch 8/200                                                                                                                                                                                                 

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8438                                                                                                                        

Epoch 9/200                                                                                                                                                                                                 

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7500                                                                                                                        

Epoch 10/200                                                                                                                                                                   


Epoch 15/200                                                                                                                                                                                                

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7656                                                                                                                        

Epoch 16/200                                                                                                                                                                                                

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8125                                                                                                                        

Epoch 17/200                                                                                                                                                                   


Epoch 21/200                                                                                                                                                                                                

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8594                                                                                                                        
  8/118 [=>............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8203                                                                                                                       
 11/118 [=>............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8224                                                                                                                       
 18/118 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8186                                                                                                

  super(Adam, self).__init__(name, **kwargs)



 1/79 [..............................] - ETA: 57s - loss: 0.6931 - accuracy: 0.7708                                                                                                                         

Epoch 2/200                                                                                                                                                                                                 

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8229                                                                                                                          

Epoch 3/200                                                                                                                                                                                                 

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8229                                                                                              


Epoch 9/200                                                                                                                                                                                                 

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7188                                                                                                                          

Epoch 10/200                                                                                                                                                                                                

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7708                                                                                                                          

Epoch 11/200                                                                                                                                                                   


Epoch 17/200                                                                                                                                                                                                

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8021                                                                                                                          

Epoch 18/200                                                                                                                                                                                                

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8542                                                                                                                          

Epoch 19/200                                                                                                                                                                   

  super(SGD, self).__init__(name, **kwargs)



  1/235 [..............................] - ETA: 1:56 - loss: 0.6931 - accuracy: 0.8125                                                                                                                      
 42/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8006                                                                                                                       

Epoch 2/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7500                                                                                                                        
 42/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8229                                                                                               

  1/235 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7500                                                                                                                        
 40/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8031                                                                                                                       

Epoch 7/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7500                                                                                                                        
 42/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8170                                                                                               

  1/235 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7188                                                                                                                        
 43/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8096                                                                                                                       

Epoch 12/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8125                                                                                                                        
 41/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7805                                                                                               

Epoch 16/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8750                                                                                                                        
 38/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8076                                                                                                                       

Epoch 17/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8125                                                                                              

Epoch 21/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8438                                                                                                                        
 41/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7957                                                                                                                       

 1/79 [..............................] - ETA: 5s                                                                                                                                                            

params: {'batch_norm': 'no', 'batch_size': 32.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 2.0, 'hidden_units': 160.0, 'input_dropout': 0.15000000000

  super(Adam, self).__init__(name, **kwargs)



  1/235 [..............................] - ETA: 2:32 - loss: 0.6931 - accuracy: 0.8125                                                                                                                      
 32/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8037                                                                                                                       

Epoch 2/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8125                                                                                                                        
 38/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8051                                                                                               


Epoch 6/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7812                                                                                                                        
 32/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8096                                                                                                                       

Epoch 7/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7812                                                                                             


Epoch 10/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7500                                                                                                                        
 35/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8179                                                                                                                       

Epoch 11/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8438                                                                                             

Epoch 14/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7188                                                                                                                        
 34/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8244                                                                                                                       

Epoch 15/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8125                                                                                              


Epoch 19/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7500                                                                                                                        
 39/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8005                                                                                                                       

Epoch 20/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7500                                                                                             

  super(SGD, self).__init__(name, **kwargs)



  1/235 [..............................] - ETA: 2:25 - loss: 0.6931 - accuracy: 0.8750                                                                                                                      
 33/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8258                                                                                                                       

Epoch 2/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8438                                                                                                                        
 37/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8184                                                                                               


Epoch 6/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7812                                                                                                                        
 37/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8091                                                                                                                       

Epoch 7/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8750                                                                                             

Epoch 10/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7812                                                                                                                        
 33/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7907                                                                                                                       

Epoch 11/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7812                                                                                              


Epoch 15/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8750                                                                                                                        
 38/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8002                                                                                                                       

Epoch 16/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8125                                                                                             


Epoch 19/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7188                                                                                                                        
 37/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8142                                                                                                                       

Epoch 20/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8125                                                                                             

  super(SGD, self).__init__(name, **kwargs)



 1/79 [..............................] - ETA: 33s - loss: 0.6931 - accuracy: 0.8646                                                                                                                         

Epoch 2/200                                                                                                                                                                                                 

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8125                                                                                                                          

Epoch 3/200                                                                                                                                                                                                 

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7396                                                                                              


Epoch 11/200                                                                                                                                                                                                

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7604                                                                                                                          

Epoch 12/200                                                                                                                                                                                                

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8021                                                                                                                          

Epoch 13/200                                                                                                                                                                   

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7604                                                                                                                          

Epoch 21/200                                                                                                                                                                                                

 1/79 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8542                                                                                                                          

 1/79 [..............................] - ETA: 4s                                                                                                                                                            

params: {'batch_norm': 'no', 'batch_size': 96.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 128.0, 'input_dropout': 0.2, 'optimi

  super(SGD, self).__init__(name, **kwargs)



  1/235 [..............................] - ETA: 2:07 - loss: 0.6931 - accuracy: 0.8125                                                                                                                      
 36/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7951                                                                                                                       

Epoch 2/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8125                                                                                                                        
 37/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8142                                                                                               

Epoch 6/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8438                                                                                                                        
 38/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8150                                                                                                                       

Epoch 7/200                                                                                                                                                                                                 

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8125                                                                                              


Epoch 11/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.6562                                                                                                                        
 40/235 [====>.........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7906                                                                                                                       

Epoch 12/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7812                                                                                             


Epoch 16/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.9375                                                                                                                        
 37/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8066                                                                                                                       

Epoch 17/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.8438                                                                                             

 37/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8091                                                                                                                       

Epoch 21/200                                                                                                                                                                                                

  1/235 [..............................] - ETA: 1s - loss: 0.6931 - accuracy: 0.7500                                                                                                                        
 39/235 [===>..........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8045                                                                                                                       

 1/79 [..............................] - ETA: 5s                                                                                                                                   

  super(Adam, self).__init__(name, **kwargs)



  1/118 [..............................] - ETA: 1:41 - loss: 0.6931 - accuracy: 0.8750                                                                                                                      

Epoch 2/200                                                                                                                                                                                                 

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7969                                                                                                                        

Epoch 3/200                                                                                                                                                                                                 

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7969                                                                                            

Epoch 7/200                                                                                                                                                                                                 

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8438                                                                                                                        

Epoch 8/200                                                                                                                                                                                                 

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.7344                                                                                                                        

Epoch 9/200                                                                                                                                                                     


Epoch 13/200                                                                                                                                                                                                

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8281                                                                                                                        

Epoch 14/200                                                                                                                                                                                                

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8281                                                                                                                        

Epoch 15/200                                                                                                                                                                   


Epoch 19/200                                                                                                                                                                                                

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8438                                                                                                                        
 27/118 [=====>........................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8102                                                                                                                       

Epoch 20/200                                                                                                                                                                                                

  1/118 [..............................] - ETA: 0s - loss: 0.6931 - accuracy: 0.8438                                                                                             

In [82]:
# ---------------------------------
# データ等の準備
# ----------------------------------
import numpy as np
import pandas as pd

# train_xは学習データ、train_yは目的変数、test_xはテストデータ
# pandasのDataFrame, Seriesで保持します。（numpyのarrayで保持することもあります）

train = pd.read_csv('./input/sample-data/train_preprocessed_onehot.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']
test_x = pd.read_csv('./input/sample-data/test_preprocessed_onehot.csv')

In [84]:
ary = np.array([10,20,30,0])
idx = ary.argsort()
print(idx)
print(idx[::-1])

print(ary[idx[::-1][:3]])

[3 0 1 2]
[2 1 0 3]
[30 20 10]


In [85]:

import scipy.stats as st

corrs = []
for c in train_x.columns:
    corr = np.corrcoef(train_x[c], train_y)[0,1]
    corrs.append(corr)
corrs = np.array(corrs)

corrs_sp = []
for c in train_x.columns:
    corr_sp = st.spearmanr(train_x[c], train_y).correlation
    corrs_sp.append(corr_sp)
corrs_sp = np.array(corrs_sp)

idx = np.argsort(np.abs(corrs))[::-1]
top_cols, top_importances = train_x.columns.values[idx][:5], corrs[idx][:5]
print(top_cols, top_importances)

['medical_info_a1' 'medical_keyword_5' 'medical_keyword_4'
 'medical_keyword_3' 'age'] [0.21805214 0.21368557 0.18109642 0.16723961 0.15155308]


In [86]:
idx2 = np.argsort(np.abs(corrs_sp))[::-1]
top_col2, top_importances2 = train_x.columns.values[idx2][:5], corrs_sp[idx2][:5]
print(top_col2, top_importances2)

['medical_info_a1' 'medical_keyword_5' 'medical_keyword_4'
 'medical_keyword_3' 'age'] [0.22182331 0.21368557 0.18109642 0.16723961 0.15170291]
