# 1: インポート

# 2: データ読込み

# 3: 特徴量数値化

# 4: ハイパーパラメータの調整

# 5: 実装

# 6: 過学習の有無を確認

# 1: インポート

In [1]:
import pandas as pd
from pandas import Series,DataFrame

from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error

import numpy as np

import keras
from keras import regularizers
from keras.metrics import mae
from keras import regularizers
from keras.datasets import fashion_mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


# 2: データの読み込み

In [2]:
data_set = pd.read_csv(rf'data.csv',sep=",", header=0)

x = DataFrame(data_set.drop(data_set[["Product", "Component", "target"]], axis=1))
y = DataFrame(data_set["target"])


tr_x ,va_x ,tr_y ,va_y = train_test_split(x,y,test_size=0.2, shuffle=False)

# データを標準化
stdsc = StandardScaler()
tr_x = stdsc.fit_transform(tr_x)
va_x = stdsc.transform(va_x)

#データの整形
tr_x = tr_x.astype(np.float)
va_x = va_x.astype(np.float)

#数値データの場合
tr_y = np.array(tr_y, dtype = np.float32)
va_y = np.array(va_y, dtype = np.float32)

# 4: ハイパーパラメータの調整

In [21]:
from hyperopt import hp
from keras.callbacks import EarlyStopping
from keras.layers.advanced_activations import ReLU, PReLU
from keras.layers.core import Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential
from keras.optimizers import Adagrad
from keras.optimizers import Adadelta
from keras.optimizers import Adam
from keras.optimizers import Adamax
from sklearn.preprocessing import StandardScaler

# 基本となるパラメータ
base_param = {
    'input_dropout': 0.0,
    'hidden_layers': 3,
    'hidden_units': 96,
    'hidden_activation': 'relu',
    'hidden_dropout': 0.2,
    'batch_norm': 'before_act',
    'optimizer': {'type': 'adam', 'lr': 0.001},
    'batch_size': 64,
}

# 探索するパラメータの空間を指定する
param_space = {
    'input_dropout': hp.quniform('input_dropout', 0, 0.1, 0.05),
    'hidden_layers': hp.quniform('hidden_layers', 2, 5, 1),
    'hidden_units': hp.quniform('hidden_units', 32, 256, 32),
    'hidden_activation': hp.choice('hidden_activation', ['prelu', 'relu']),
    'hidden_dropout': hp.quniform('hidden_dropout', 0, 0.3, 0.05),
    'batch_norm': hp.choice('batch_norm', ['before_act', 'no']),
    'optimizer': hp.choice('optimizer',
                           [{'type': 'adam',
                             'lr': hp.loguniform('adam_lr', np.log(0.00001), np.log(0.01))},
                            {'type': 'adagrad',
                             'lr': hp.loguniform('adagrad_lr', np.log(0.00001), np.log(0.01))},
                            {'type': 'adadelta',
                             'lr': hp.loguniform('adadelta_lr', np.log(0.00001), np.log(0.01))},
                            {'type': 'adamax',
                             'lr': hp.loguniform('adamax_lr', np.log(0.00001), np.log(0.01))}]),
    'batch_size': hp.quniform('batch_size', 32, 128, 32),
}

In [None]:
class MLP:

    def __init__(self, params):
        self.params = params
        self.scaler = None
        self.model = None

    def fit(self, tr_x, tr_y, va_x, va_y):

        # パラメータ
        input_dropout = self.params['input_dropout']
        hidden_layers = int(self.params['hidden_layers'])
        hidden_units = int(self.params['hidden_units'])
        hidden_activation = self.params['hidden_activation']
        hidden_dropout = self.params['hidden_dropout']
        batch_norm = self.params['batch_norm']
        optimizer_type = self.params['optimizer']['type']
        optimizer_lr = self.params['optimizer']['lr']
        batch_size = int(self.params['batch_size'])

        # 標準化
        self.scaler = StandardScaler()
        tr_x = self.scaler.fit_transform(tr_x)
        va_x = self.scaler.transform(va_x)

        self.model = Sequential()

        # 入力層
        self.model.add(Dropout(input_dropout, input_shape=(tr_x.shape[1],)))

        # 中間層
        for i in range(hidden_layers):
            self.model.add(Dense(hidden_units))
            if batch_norm == 'before_act':
                self.model.add(BatchNormalization())
            if hidden_activation == 'prelu':
                self.model.add(PReLU())
            elif hidden_activation == 'relu':
                self.model.add(ReLU())
            else:
                raise NotImplementedError
            self.model.add(Dropout(hidden_dropout))

        # 出力層
        self.model.add(Dense(1))

        # オプティマイザ
        if optimizer_type == 'adam':
            optimizer = Adam(lr=optimizer_lr, beta_1=0.9, beta_2=0.999, epsilon=1e-07, decay=0.)
        elif optimizer_type == 'adadelta':
            optimizer = Adadelta(lr=optimizer_lr, rho=0.95, epsilon=1e-07, decay=0.0)
        elif optimizer_type == 'adamax':
            optimizer = Adamax(lr=optimizer_lr, beta_1=0.9, beta_2=0.999, epsilon=1e-07, decay=0.0)
        elif optimizer_type == 'adagrad':
            optimizer = Adagrad(lr=optimizer_lr, epsilon=1e-07, decay=0.0)
        else:
            raise NotImplementedError
            

        # 目的関数、評価指標などの設定
        
        self.model.compile(loss='mean_absolute_error', optimizer=optimizer, metrics=['mae'])

        # エポック数、アーリーストッピング
        nb_epoch = 50
        patience = 20
        early_stopping = EarlyStopping(patience=patience, restore_best_weights=True)

        # 学習の実行
        history = self.model.fit(tr_x, tr_y,
                                 epochs=nb_epoch,
                                 batch_size=batch_size, verbose=1,
                                 validation_data=(va_x, va_y),
                                 callbacks=[early_stopping])

    def predict(self, x):
        # 予測
        x = self.scaler.transform(x)
        y_pred = self.model.predict(x)
        y_pred = y_pred.flatten()
        return y_pred



    
    

from hyperopt import fmin, tpe, STATUS_OK, Trials
from sklearn.metrics import log_loss
from sklearn.metrics import mean_absolute_error


def score(params):
    # パラメータセットを指定したときに最小化すべき関数を指定する
    # モデルのパラメータ探索においては、モデルにパラメータを指定して学習・予測させた場合のスコアとする
    model = MLP(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    score = mean_absolute_error(va_y, va_pred)
    print(f'params: {params}, MAE: {score:.4f}')

    # 情報を記録しておく
    history.append((params, score))

    return {'loss': score, 'status': STATUS_OK}


# hyperoptによるパラメータ探索の実行
max_evals = 10
trials = Trials()
history = []
fmin(score, param_space, algo=tpe.suggest, trials=trials, max_evals=max_evals)


# 記録した情報からパラメータとスコアを出力する
# trialsからも情報が取得できるが、パラメータを取得しにくい
history = sorted(history, key=lambda tpl: tpl[1])
best = history[0]
print(f'best params:{best[0]}, score:{best[1]:.4f}')