# Overview

* Simple Seq2seq model with LSTM
* Hyper parameter optimization with GP

In [None]:
import skopt
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize
from skopt.utils import use_named_args

import keras
import keras.backend as K
from keras.layers import Dense, LSTM, TimeDistributed, Input
from keras.models import Model
from keras.optimizers import Adam

import numpy as np

# Build simple seq2seq dataset

* without teacher forcing
* with teacher forcing

In [None]:
x_list = []
y_list = []

for k in range(10000):
  x = np.linspace(0, 30, 200)
  y = np.zeros(shape=(200,))

  for i in range(3):
    fq = np.random.random() * 4 + 1e-8
    start_theta = np.random.random() * 3.14 * 2
    height = np.random.random() * 0.5 + 0.5
    y = y + np.sin(start_theta + x*fq)*height
  y = y / (np.max(y)-np.min(y))
  x_list.append(y[:100])
  y_list.append(y[100:])

x_train = np.array(x_list[:9000]).reshape([-1, 100, 1])
y_train = np.array(y_list[:9000]).reshape([-1, 100, 1])
x_test = np.array(x_list[9000:]).reshape([-1, 100, 1])
y_test = np.array(y_list[9000:]).reshape([-1, 100, 1])


# with teacher forcing
decoder_train_input_data = np.concatenate([x_train[:, -2:-1, :], y_train[:, :-1, :]], axis=1)
decoder_test_input_data = np.concatenate([x_test[:, -2:-1, :], y_test[:, :-1, :]], axis=1)

# without teacher forcing
# decoder_train_input_data = np.ones(shape=x_train.shape)
# decoder_test_input_data = np.ones(shape=x_test.shape)

print("train_data", x_train.shape, y_train.shape)
print("test_data", x_test.shape, y_test.shape)

# Define parameter space




In [None]:
dim_learning_rate = Real(low=1e-6, high=1e-2, prior='log-uniform', name='learning_rate')
dim_lstm_state_size = Integer(low=10, high=1024, name='lstm_state_size')
dim_lstm_depth  = Integer(low=1, high=3, name='lstm_depth')
dimensions = [dim_learning_rate, dim_lstm_state_size, dim_lstm_depth]
default_parameters = [1e-5, 10, 3]

# Define create model function

create nn model from params

In [None]:
def create_model(learning_rate, lstm_state_size, lstm_depth=1):
  K.reset_uids()
  K.clear_session()  

  encoder_input = Input(shape=[100, 1])
  x = encoder_input

  for j in range(lstm_depth):
    if j < lstm_depth-1:
      x = LSTM(lstm_state_size, return_sequences=True)(x)
    else:
      x, encoder_state1, encoder_state2 = LSTM(lstm_state_size, return_state=True)(x)
  
  encoder_state = [encoder_state1, encoder_state2]
  decoder_input = Input(shape=[None, 1])
  x = decoder_input

  for j in range(lstm_depth):
    if j == 0:
      x = LSTM(lstm_state_size, return_sequences=True)(x, initial_state=encoder_state)
    else:
      x = LSTM(lstm_state_size, return_sequences=True)(x)

  output = TimeDistributed(Dense(1, activation="tanh"))(x)

  model = Model([encoder_input, decoder_input], output)
  optimizer = Adam(lr=learning_rate)
  model.compile(optimizer=optimizer, loss='mse', metrics=['mae']) 
  return model

# Define fitness function

model performance check function

In [None]:
best_mae = 1e+1000
@use_named_args(dimensions=dimensions)
def fitness(learning_rate, lstm_state_size, lstm_depth):
  print("learning rate : %f"%learning_rate)
  print("lstm_state_size : %d"%lstm_state_size)
  print("lstm_depth : %d"%lstm_depth)

  model = create_model(learning_rate, lstm_state_size, lstm_depth)
  validation_data = [[x_test, decoder_test_input_data], y_test]
  history = model.fit([x_train, decoder_train_input_data], y_train, epochs=3, validation_data=validation_data)  
  val_mae = history.history['val_mean_absolute_error'][-1]

  global best_mae

  if val_mae < best_mae:
    model.save("./best_model.h5")
    best_mae = val_mae
    del model

  return val_mae

# hyper parameter serach with GP

In [None]:
search_result = gp_minimize(func=fitness,
                            dimensions=dimensions,
                            acq_func='EI', # Expected Improvement.
                            n_calls=40,
                            x0=default_parameters)