seq2seq model architectures:
- simple (encoder: lstm, decoder: lstm -> dense)
- stacked_encoder (encoder: lstm -> lstm, decoder: lstm -> dense)
- bistacked_encoder (encoder: bilstm -> lstm, decoder: lstm -> dense)
- stacked_decoder (encoder: lstm, decoder: lstm -> lstm -> dense)
- stacked (encoder: lstm -> lstm, decoder: lstm -> lstm -> dense)
- bistacked (encoder: bilstm -> lstm, decoder: lstm -> lstm -> dense)

### Google Colab utils

In [None]:
#!pip install keras-tuner

In [None]:
# # memory footprint support libraries/code
# !ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
# !pip install gputil
# !pip install psutil
# !pip install humanize
# import psutil
# import humanize
# import os
# import GPUtil as GPU
# GPUs = GPU.getGPUs()
# # XXX: only one GPU on Colab and isn’t guaranteed
# gpu = GPUs[0]
# def printm():
#  process = psutil.Process(os.getpid())
#  print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
#  print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
# printm() 

In [None]:
#!kill -9 -1

In [None]:
# from google.colab import drive
# drive.mount('/gdrive')
# %cd "/gdrive/My Drive/air-pollution"

### Modeling

In [1]:
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Lambda, Reshape, Dropout
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.callbacks import Callback, EarlyStopping
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

from kerastuner import HyperModel
from kerastuner.tuners import RandomSearch

import numpy as np
import pandas as pd
from scipy.ndimage.interpolation import shift
import matplotlib.pyplot as plt


Bad key "text.kerning_factor" on line 4 in
/home/zafir/miniconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution


In [2]:
train_encoder_input_data = np.load('./data/third-order/seq2seq/train_encoder_input_data.npy')
train_decoder_input_data = np.load('./data/third-order/seq2seq/train_decoder_input_data.npy')
train_decoder_target_data = np.load('./data/third-order/seq2seq/train_decoder_target_data.npy')

valid_encoder_input_data = np.load('./data/third-order/seq2seq/valid_encoder_input_data.npy')
valid_decoder_input_data = np.load('./data/third-order/seq2seq/valid_decoder_input_data.npy')
valid_decoder_target_data = np.load('./data/third-order/seq2seq/valid_decoder_target_data.npy')

test_encoder_input_data = np.load('./data/third-order/seq2seq/test_encoder_input_data.npy')
test_decoder_input_data = np.load('./data/third-order/seq2seq/test_decoder_input_data.npy')
test_decoder_target_data = np.load('./data/third-order/seq2seq/test_decoder_target_data.npy')

In [3]:
print(train_encoder_input_data.shape)
print(train_decoder_input_data.shape)
print(train_decoder_target_data.shape)

(67396, 24, 23)
(67396, 12, 23)
(67396, 12, 2)


In [4]:
Tx, encoder_input_dim = (train_encoder_input_data.shape[1], 
                         train_encoder_input_data.shape[2])
    
Ty, decoder_input_dim = (train_decoder_input_data.shape[1], 
                         train_decoder_input_data.shape[2])

# we are predicting the pollution only, leave out the mask
decoder_output_dim = 1  

In [5]:
def masked_mse(y_true, y_pred):
    return K.sum(((y_true[:, :, 0] - y_pred[:, :, 0]) ** 2) * (1-y_true[:, :, 1]), 
                  axis=-1) / (1 + K.sum((1-y_true[:, :, 1]), axis=-1))

## Simple seq2seq

In [6]:
class SimpleSeq2Seq(HyperModel):

    def __init__(self, Tx, Ty, encoder_input_dim, 
                 decoder_input_dim, decoder_output_dim):
        self.Tx = Tx
        self.Ty = Ty
        self.encoder_input_dim = encoder_input_dim
        self.decoder_input_dim = decoder_input_dim
        self.decoder_output_dim = decoder_output_dim

        
    def build(self, hp):

        # ------------------- SHARED LAYERS ---------------------
        latent_dim = hp.Int('latent_dim', min_value=32, max_value=128, step=32)
        
        encoder_lstm = LSTM(latent_dim, return_state=True, name='encoder_lstm')
        decoder_lstm = LSTM(latent_dim, return_sequences=True, 
                            return_state=True, name='decoder_lstm')
        decoder_dense = Dense(self.decoder_output_dim, 
                              activation='linear', name='decoder_dense')
        dense_dropout = Dropout(rate=hp.Float('dense_dropout', 0, 0.7, 
                                step=0.1, default=0.5))
        
        
        # ---------------------- MODEL ------------------------
        # Define the inputs for the encoder
        encoder_inputs = Input(shape=(self.Tx, self.encoder_input_dim), 
                               name='encoder_input')

        # We discard output and keep the states only.
        _, h, c = encoder_lstm(encoder_inputs)

        # Define an input for the decoder
        decoder_inputs = Input(shape=(self.Ty, self.decoder_input_dim), 
                               name='decoder_input')

        # Obtain all the outputs from the decoder (return_sequences = True)
        decoder_outputs, _, _  = decoder_lstm(decoder_inputs, initial_state=[h, c])

        # Apply dropout
        decoder_outputs = dense_dropout(decoder_outputs)

        # Apply dense 
        decoder_outputs = decoder_dense(decoder_outputs)

        model = Model(inputs=[encoder_inputs, decoder_inputs], 
                      outputs=decoder_outputs)
        optimizer = Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, 
                                                sampling='log'))
        model.compile(optimizer=optimizer, loss=masked_mse)

        return model
        

In [7]:
model_builder = SimpleSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=1000,
                     executions_per_trial=1,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='simple')



In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=64,
             epochs=250,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=20, 
                                      verbose=1)])

In [None]:
tuner.results_summary()

## Stacked encoder

In [None]:
class StackedEncoderSeq2Seq(HyperModel):

    def __init__(self, Tx, Ty, encoder_input_dim, 
                 decoder_input_dim, decoder_output_dim):
        self.Tx = Tx
        self.Ty = Ty
        self.encoder_input_dim = encoder_input_dim
        self.decoder_input_dim = decoder_input_dim
        self.decoder_output_dim = decoder_output_dim

        
    def build(self, hp):

        # ------------------- SHARED LAYERS ---------------------
        latent_dim = hp.Int('latent_dim', min_value=32, max_value=128, step=32)
        
        encoder_lstm_1 = LSTM(latent_dim, return_sequences=True, 
                              name='encoder_lstm_1')
        encoder_lstm_2 = LSTM(latent_dim, return_state=True, 
                              name='encoder_lstm_2')
        decoder_lstm = LSTM(latent_dim, return_sequences=True, 
                            return_state=True, name='decoder_lstm')
        decoder_dense = Dense(self.decoder_output_dim, 
                              activation='linear', name='decoder_dense')
        
        seq_dropout = Dropout(rate=hp.Float('seq_dropout', 0, 0.7, 
                                step=0.1, default=0.5))
        dense_dropout = Dropout(rate=hp.Float('dense_dropout', 0, 0.7, 
                                step=0.1, default=0.5))

        
        # ---------------------- MODEL ------------------------
        # Define the inputs for the encoder
        encoder_inputs = Input(shape=(self.Tx, self.encoder_input_dim), 
                               name='encoder_input')
        
        # Obtain the outputs from the first encoder layer
        encoder_out = encoder_lstm_1(encoder_inputs) 
        
        # Pass the outputs through a dropout layer before 
        # feeding them to the next LSTM layer
        encoder_out = seq_dropout(encoder_out)
        
        # We discard the output and keep the states only.
        _, h, c = encoder_lstm_2(encoder_out)

        # Define an input for the decoder
        decoder_inputs = Input(shape=(self.Ty, self.decoder_input_dim), 
                               name='decoder_input')

        # Obtain all the outputs from the decoder (return_sequences = True)
        decoder_outputs, _, _  = decoder_lstm(decoder_inputs, initial_state=[h, c])

        # Apply dropout
        decoder_outputs = dense_dropout(decoder_outputs)

        # Apply dense 
        decoder_outputs = decoder_dense(decoder_outputs)

        model = Model(inputs=[encoder_inputs, decoder_inputs], 
                      outputs=decoder_outputs)
        optimizer = Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, 
                                                sampling='log'))
        model.compile(optimizer=optimizer, loss=masked_mse)

        return model
        

In [None]:
model_builder = StackedEncoderSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=1000,
                     executions_per_trial=1,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='stacked-encoder')

In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=64,
             epochs=250,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=20, 
                                      verbose=1)])

## BiStacked encoder

In [6]:
class BiStackedEncoderSeq2Seq(HyperModel):

    def __init__(self, Tx, Ty, encoder_input_dim, 
                 decoder_input_dim, decoder_output_dim):
        self.Tx = Tx
        self.Ty = Ty
        self.encoder_input_dim = encoder_input_dim
        self.decoder_input_dim = decoder_input_dim
        self.decoder_output_dim = decoder_output_dim

        
    def build(self, hp):

        # ------------------- SHARED LAYERS ---------------------
        latent_dim = hp.Int('latent_dim', min_value=32, max_value=128, step=32)
        
        encoder_lstm_1 = Bidirectional(LSTM(latent_dim, return_sequences=True, 
                              name='encoder_lstm_1'), merge_mode='concat')
        encoder_lstm_2 = LSTM(latent_dim, return_state=True, 
                              name='encoder_lstm_2')
        decoder_lstm = LSTM(latent_dim, return_sequences=True, 
                            return_state=True, name='decoder_lstm')
        decoder_dense = Dense(self.decoder_output_dim, 
                              activation='linear', name='decoder_dense')
        seq_dropout = Dropout(rate=hp.Float('seq_dropout', 0, 0.7, 
                                step=0.1, default=0.5))
        dense_dropout = Dropout(rate=hp.Float('dense_dropout', 0, 0.7, 
                                step=0.1, default=0.5))

        
        # ---------------------- MODEL ------------------------
        # Define the inputs for the encoder
        encoder_inputs = Input(shape=(self.Tx, self.encoder_input_dim), 
                               name='encoder_input')
        
        # Obtain the outputs from the first encoder layer
        encoder_out = encoder_lstm_1(encoder_inputs) 
        
        # Pass the outputs through a dropout layer before 
        # feeding them to the next LSTM layer
        encoder_out = seq_dropout(encoder_out)
        
        # We discard the output and keep the states only.
        _, h, c = encoder_lstm_2(encoder_out)

        # Define an input for the decoder
        decoder_inputs = Input(shape=(self.Ty, self.decoder_input_dim), 
                               name='decoder_input')

        # Obtain all the outputs from the decoder (return_sequences = True)
        decoder_outputs, _, _  = decoder_lstm(decoder_inputs, initial_state=[h, c])

        # Apply dropout
        decoder_outputs = dense_dropout(decoder_outputs)

        # Apply dense 
        decoder_outputs = decoder_dense(decoder_outputs)

        model = Model(inputs=[encoder_inputs, decoder_inputs], 
                      outputs=decoder_outputs)
        optimizer = Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, 
                                                sampling='log'))
        model.compile(optimizer=optimizer, loss=masked_mse)

        return model
        

In [7]:
model_builder = BiStackedEncoderSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=1000,
                     executions_per_trial=1,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='bistacked-encoder')



In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=64,
             epochs=250,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=20, 
                                      verbose=1)])

In [None]:
tuner.results_summary()

## Stacked decoder

In [None]:
class StackedDecoderSeq2Seq(HyperModel):

    def __init__(self, Tx, Ty, encoder_input_dim, 
                 decoder_input_dim, decoder_output_dim):
        self.Tx = Tx
        self.Ty = Ty
        self.encoder_input_dim = encoder_input_dim
        self.decoder_input_dim = decoder_input_dim
        self.decoder_output_dim = decoder_output_dim

        
    def build(self, hp):

        # ------------------- SHARED LAYERS ---------------------
        latent_dim = hp.Int('latent_dim', min_value=32, max_value=128, step=32)
        
        encoder_lstm = LSTM(latent_dim, return_state=True, 
                              name='encoder_lstm')
        decoder_lstm_1 = LSTM(latent_dim, return_sequences=True, 
                              name='decoder_lstm_1')
        decoder_lstm_2 = LSTM(latent_dim, return_sequences=True, 
                            return_state=True, name='decoder_lstm_2')
        decoder_dense = Dense(self.decoder_output_dim, 
                              activation='linear', name='decoder_dense')
        seq_dropout = Dropout(rate=hp.Float('seq_dropout', 0, 0.7, 
                                step=0.1, default=0.5))
        dense_dropout = Dropout(rate=hp.Float('dense_dropout', 0, 0.7, 
                                step=0.1, default=0.5))

        
        # ---------------------- MODEL ------------------------
        # Define the inputs for the encoder
        encoder_inputs = Input(shape=(self.Tx, self.encoder_input_dim), 
                               name='encoder_input')
        
        # We discard the output and keep the states only.
        _, h, c = encoder_lstm(encoder_inputs)

        # Define an input for the decoder
        decoder_inputs = Input(shape=(self.Ty, self.decoder_input_dim), 
                               name='decoder_input')

        # Obtain all the outputs from the decoder (return_sequences = True)
        decoder_outputs = decoder_lstm_1(decoder_inputs, initial_state=[h, c])

        # Apply dropout
        decoder_outputs = seq_dropout(decoder_outputs)

        # Apply LSTM again (stacked)
        decoder_outputs, _, _ = decoder_lstm_2(decoder_outputs)
        
        # Apply dropout
        decoder_outputs = dense_dropout(decoder_outputs)
        
        # Apply dense 
        decoder_outputs = decoder_dense(decoder_outputs)

        model = Model(inputs=[encoder_inputs, decoder_inputs], 
                      outputs=decoder_outputs)
        optimizer = Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, 
                                                sampling='log'))
        model.compile(optimizer=optimizer, loss=masked_mse)

        return model
        

In [None]:
model_builder = StackedDecoderSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=1000,
                     executions_per_trial=1,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='stacked-decoder')

In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=64,
             epochs=250,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=20, 
                                      verbose=1)])

In [None]:
tuner.results_summary()

## Stacked

In [7]:

6.0class StackedSeq2Seq(HyperModel):

    def __init__(self, Tx, Ty, encoder_input_dim, 
                 decoder_input_dim, decoder_output_dim):
        self.Tx = Tx
        self.Ty = Ty
        self.encoder_input_dim = encoder_input_dim
        self.decoder_input_dim = decoder_input_dim
        self.decoder_output_dim = decoder_output_dim

        
    def build(self, hp):

        # ------------------- SHARED LAYERS ---------------------
        latent_dim = hp.Int('latent_dim', min_value=32, max_value=128, step=32)
        
        encoder_lstm_1 = LSTM(latent_dim, return_sequences=True,
                              name='encoder_lstm_1')
        encoder_lstm_2 = LSTM(latent_dim, return_state=True, 
                              name='encoder_lstm_2')
        decoder_lstm_1 = LSTM(latent_dim, return_sequences=True, 
                              name='decoder_lstm_1')
        decoder_lstm_2 = LSTM(latent_dim, return_sequences=True, 
                              return_state=True, name='decoder_lstm_2')
        decoder_dense = Dense(self.decoder_output_dim, 
                              activation='linear', name='decoder_dense')
        seq_dropout = Dropout(rate=hp.Float('seq_dropout', 0, 0.7, 
                                step=0.1, default=0.5))
        dense_dropout = Dropout(rate=hp.Float('dense_dropout', 0, 0.7, 
                                step=0.1, default=0.5))

        
        # ---------------------- MODEL ------------------------
        # Define the inputs for the encoder
        encoder_inputs = Input(shape=(self.Tx, self.encoder_input_dim), 
                               name='encoder_input')
        
        # First layer in the encoder
        encoder_outputs = encoder_lstm_1(encoder_inputs)
        
        # Apply dropout
        encoder_outputs = seq_dropout(encoder_outputs)
        
        # Pass the outputs to the next encoder layer, obtain h and c
        _, h, c = encoder_lstm_2(encoder_outputs)

        # Define an input for the decoder
        decoder_inputs = Input(shape=(self.Ty, self.decoder_input_dim), 
                               name='decoder_input')

        # Obtain all the outputs from the decoder (return_sequences = True)
        decoder_outputs = decoder_lstm_1(decoder_inputs, initial_state=[h, c])

        # Apply dropout
        decoder_outputs = seq_dropout(decoder_outputs)

        # Apply LSTM again (stacked)
        decoder_outputs, _, _ = decoder_lstm_2(decoder_outputs)
        
        # Apply dropout
        decoder_outputs = dense_dropout(decoder_outputs)
        
        # Apply dense 
        decoder_outputs = decoder_dense(decoder_outputs)

        model = Model(inputs=[encoder_inputs, decoder_inputs], 
                      outputs=decoder_outputs)
        optimizer = Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, 
                                                sampling='log'))
        model.compile(optimizer=optimizer, loss=masked_mse)

        return model
        

In [8]:
model_builder = StackedSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=1000,
                     executions_per_trial=1,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='stacked')

INFO:tensorflow:Reloading Oracle from existing project local-keras-tuner/stacked/oracle.json


In [9]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=64,
             epochs=250,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=20, 
                                      verbose=1)])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 67397 samples, validate on 3389 samples
Epoch 1/100
13504/67397 [=====>........................] - ETA: 42:48 - loss: 1.14 - ETA: 22:05 - loss: 1.03 - ETA: 15:08 - loss: 1.01 - ETA: 11:43 - loss: 0.98 - ETA: 9:37 - loss: 0.9483 - ETA: 8:10 - loss: 0.932 - ETA: 7:12 - loss: 0.945 - ETA: 6:33 - loss: 0.911 - ETA: 5:59 - loss: 0.890 - ETA: 5:33 - loss: 0.865 - ETA: 5:10 - loss: 0.845 - ETA: 4:53 - loss: 0.811 - ETA: 4:36 - loss: 0.805 - ETA: 4:22 - loss: 0.793 - ETA: 4:08 - loss: 0.784 - ETA: 3:58 - loss: 0.775 - ETA: 3:48 - loss: 0.769 - ETA: 3:42 - loss: 0.752 - ETA: 3:35 - loss: 0.746 - ETA: 3:27 - loss: 0.735 - ETA: 3:21 - loss: 0.724 - ETA: 3:14 - loss: 0.714 - ETA: 3:09 - loss: 0.701 - ETA: 3:04 - loss: 0.694 - ETA: 2:59 - loss: 0.683 - ETA: 2:49 - loss: 0.677 - ETA: 2:44 - loss: 0.669 - ETA: 2:41 - loss: 0.665 - ETA: 2:37 - loss: 0.661 - ETA: 2:35 - loss: 0.660 - ETA: 2:32 - loss:

KeyboardInterrupt: 

In [None]:
drive.flush_and_unmount()

## BiStacked

In [None]:
class BiStackedSeq2Seq(HyperModel):

    def __init__(self, Tx, Ty, encoder_input_dim, 
                 decoder_input_dim, decoder_output_dim):
        self.Tx = Tx
        self.Ty = Ty
        self.encoder_input_dim = encoder_input_dim
        self.decoder_input_dim = decoder_input_dim
        self.decoder_output_dim = decoder_output_dim

        
    def build(self, hp):

        # ------------------- SHARED LAYERS ---------------------
        latent_dim = hp.Int('latent_dim', min_value=32, max_value=128, step=32)
        
        encoder_lstm_1 = Bidirectional(LSTM(latent_dim, return_sequences=True,
                              name='encoder_lstm_1'), merge_mode='concat')
        encoder_lstm_2 = LSTM(latent_dim, return_state=True, 
                              name='encoder_lstm_2')
        decoder_lstm_1 = LSTM(latent_dim, return_sequences=True, 
                              name='decoder_lstm_1')
        decoder_lstm_2 = LSTM(latent_dim, return_sequences=True, 
                              return_state=True, name='decoder_lstm_2')
        decoder_dense = Dense(self.decoder_output_dim, 
                              activation='linear', name='decoder_dense')
        seq_dropout = Dropout(rate=hp.Float('seq_dropout', 0, 0.7, 
                                step=0.1, default=0.5))
        dense_dropout = Dropout(rate=hp.Float('dense_dropout', 0, 0.7, 
                                step=0.1, default=0.5))

        
        # ---------------------- MODEL ------------------------
        # Define the inputs for the encoder
        encoder_inputs = Input(shape=(self.Tx, self.encoder_input_dim), 
                               name='encoder_input')
        
        # First layer in the encoder
        encoder_outputs = encoder_lstm_1(encoder_inputs)
        
        # Apply dropout
        encoder_outputs = seq_dropout(encoder_outputs)
        
        # Pass the outputs to the next encoder layer, obtain h and c
        _, h, c = encoder_lstm_2(encoder_outputs)

        # Define an input for the decoder
        decoder_inputs = Input(shape=(self.Ty, self.decoder_input_dim), 
                               name='decoder_input')

        # Obtain all the outputs from the decoder (return_sequences = True)
        decoder_outputs = decoder_lstm_1(decoder_inputs, initial_state=[h, c])

        # Apply dropout
        decoder_outputs = seq_dropout(decoder_outputs)

        # Apply LSTM again (stacked)
        decoder_outputs, _, _ = decoder_lstm_2(decoder_outputs)
        
        # Apply dropout
        decoder_outputs = seq_dropout(decoder_outputs)
        
        # Apply dense 
        decoder_outputs = decoder_dense(decoder_outputs)

        model = Model(inputs=[encoder_inputs, decoder_inputs], 
                      outputs=decoder_outputs)
        optimizer = Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, 
                                                sampling='log'))
        model.compile(optimizer=optimizer, loss=masked_mse)

        return model
        

In [None]:
model_builder = BiStackedSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=1000,
                     executions_per_trial=1,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='bistacked')

In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=64,
             epochs=250,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=20, 
                                      verbose=1)])