Model architectures
- simple (encoder: lstm, decoder: lstm -> dense)
- stacked (encoder: lstm -> lstm, decoder: lstm -> dense)
- bistacked (encoder: bilstm -> lstm, decoder: lstm -> dense)

### Google Colab utils

In [34]:
#!pip install keras-tuner

In [None]:
# # memory footprint support libraries/code
# !ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
# !pip install gputil
# !pip install psutil
# !pip install humanize
# import psutil
# import humanize
# import os
# import GPUtil as GPU
# GPUs = GPU.getGPUs()
# # XXX: only one GPU on Colab and isn’t guaranteed
# gpu = GPUs[0]
# def printm():
#  process = psutil.Process(os.getpid())
#  print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
#  print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
# printm() 

In [None]:
#!kill -9 -1

In [24]:
# from google.colab import drive
# drive.mount('/gdrive')
# %cd "/gdrive/My Drive/air-pollution"

### Modeling

In [39]:
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Lambda, Reshape, Dropout
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.callbacks import Callback, EarlyStopping
from tensorflow.keras.optimizers import Adam

from kerastuner import HyperModel
from kerastuner.tuners import RandomSearch

import numpy as np
import pandas as pd
from scipy.ndimage.interpolation import shift
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt

In [25]:
X_train = np.load('./data/third-order/X_train.npy')
y_train = np.load('./data/third-order/y_train.npy')
X_valid = np.load('./data/third-order/X_valid.npy')
y_valid = np.load('./data/third-order/y_valid.npy')
X_test = np.load('./data/third-order/X_test.npy')
y_test = np.load('./data/third-order/y_test.npy')

In [26]:
Tx, encoder_input_dim = X_train.shape[1], X_train.shape[2]
Ty = y_train.shape[1]
# we feed back only the target variable we are predicting
decoder_input_dim = decoder_output_dim = 1  

In [27]:
train_encoder_input_data = X_train.copy()
train_decoder_target_data = y_train.copy()
train_decoder_input_data = shift(train_decoder_target_data[:, :, 0].reshape(
                                y_train.shape[0], y_train.shape[1], decoder_input_dim), 
                           shift=[0, 1, 0], cval=-10)

valid_encoder_input_data = X_valid.copy()
valid_decoder_target_data = y_valid.copy()
valid_decoder_input_data = shift(valid_decoder_target_data[:, :, 0].reshape(
                                y_valid.shape[0], y_valid.shape[1], decoder_input_dim), 
                           shift=[0, 1, 0], cval=-10)

In [28]:
def masked_mse(y_true, y_pred):
    return K.sum(((y_true[:, :, 0] - y_pred[:, :, 0]) ** 2) * (1-y_true[:, :, 1]), 
                  axis=-1) / (1 + K.sum((1-y_true[:, :, 1]), axis=-1))

## Simple seq2seq

In [31]:
class SimpleSeq2Seq(HyperModel):

    def __init__(self, Tx, Ty, encoder_input_dim, 
                 decoder_input_dim, decoder_output_dim):
        self.Tx = Tx
        self.Ty = Ty
        self.encoder_input_dim = encoder_input_dim
        self.decoder_input_dim = decoder_input_dim
        self.decoder_output_dim = decoder_output_dim

        
    def build(self, hp):

        # ------------------- SHARED LAYERS ---------------------
        latent_dim = hp.Int('latent_dim', min_value=16, max_value=64, step=16)
        
        encoder_lstm = LSTM(latent_dim, return_state=True, name='encoder_lstm')
        decoder_lstm = LSTM(latent_dim, return_sequences=True, 
                            return_state=True, name='decoder_lstm')
        dropout = Dropout(rate=hp.Float('dropout', 0, 0.5, 
                                        step=0.1, default=0.5))
        decoder_dense = Dense(self.decoder_output_dim, 
                              activation='linear', name='decoder_dense')
        
        
        # ---------------------- MODEL ------------------------
        # Define the inputs for the encoder
        encoder_inputs = Input(shape=(self.Tx, self.encoder_input_dim), 
                               name='encoder_input')

        # We discard output and keep the states only.
        _, h, c = encoder_lstm(encoder_inputs)

        # Define an input for the decoder
        decoder_inputs = Input(shape=(self.Ty, self.decoder_input_dim), 
                               name='decoder_input')

        # Obtain all the outputs from the decoder (return_sequences = True)
        decoder_outputs, _, _  = decoder_lstm(decoder_inputs, initial_state=[h, c])

        # Apply dropout
        decoder_outputs = dropout(decoder_outputs)

        # Apply dense 
        decoder_outputs = decoder_dense(decoder_outputs)

        model = Model(inputs=[encoder_inputs, decoder_inputs], 
                      outputs=decoder_outputs)
        optimizer = Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, 
                                                sampling='log'))
        model.compile(optimizer=optimizer, loss=masked_mse)

        return model
        

In [32]:
model_builder = SimpleSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=1000,
                     executions_per_trial=1,
                     directory='local-keras-tuner', 
                     project_name='simple')



In [33]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=64,
             epochs=100,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=10, 
                                      verbose=1)])

Train on 67397 samples, validate on 3389 samples
Epoch 1/100
14208/67397 [=====>........................] - ETA: 15:42 - loss: 0.86 - ETA: 5:32 - loss: 0.6133 - ETA: 3:30 - loss: 0.495 - ETA: 2:19 - loss: 0.406 - ETA: 1:46 - loss: 0.362 - ETA: 1:34 - loss: 0.338 - ETA: 1:27 - loss: 0.320 - ETA: 1:26 - loss: 0.310 - ETA: 1:20 - loss: 0.296 - ETA: 1:14 - loss: 0.287 - ETA: 1:07 - loss: 0.276 - ETA: 1:04 - loss: 0.266 - ETA: 59s - loss: 0.256 - ETA: 55s - loss: 0.24 - ETA: 52s - loss: 0.23 - ETA: 51s - loss: 0.23 - ETA: 51s - loss: 0.23 - ETA: 50s - loss: 0.23 - ETA: 47s - loss: 0.22 - ETA: 46s - loss: 0.21 - ETA: 46s - loss: 0.21 - ETA: 45s - loss: 0.21 - ETA: 44s - loss: 0.20 - ETA: 42s - loss: 0.20 - ETA: 41s - loss: 0.19 - ETA: 40s - loss: 0.19 - ETA: 39s - loss: 0.19 - ETA: 38s - loss: 0.18 - ETA: 37s - loss: 0.18 - ETA: 38s - loss: 0.18 - ETA: 38s - loss: 0.18 - ETA: 37s - loss: 0.17 - ETA: 36s - loss: 0.17 - ETA: 36s - loss: 0.17 - ETA: 35s - loss: 0.17 - ETA: 35s - loss: 0.16 - ET

KeyboardInterrupt: 

## Stacked seq2seq

In [36]:
class StackedSeq2Seq(HyperModel):

    def __init__(self, Tx, Ty, encoder_input_dim, 
                 decoder_input_dim, decoder_output_dim):
        self.Tx = Tx
        self.Ty = Ty
        self.encoder_input_dim = encoder_input_dim
        self.decoder_input_dim = decoder_input_dim
        self.decoder_output_dim = decoder_output_dim

        
    def build(self, hp):

        # ------------------- SHARED LAYERS ---------------------
        encoder_latent_dim = hp.Int('encoder_latent_dim', min_value=32, 
                                    max_value=128, step=32)
        shared_latent_dim = hp.Int('shared_latent_dim', min_value=32, 
                                   max_value=128, step=32)
        
        encoder_lstm_1 = LSTM(encoder_latent_dim, return_sequences=True, 
                              name='encoder_lstm_1')
        encoder_dropout = Dropout(rate=hp.Float('encoder_dropout', 0, 0.7, 
                                        step=0.1, default=0.5))
        encoder_lstm_2 = LSTM(shared_latent_dim, return_state=True, 
                              name='encoder_lstm_2')
        decoder_lstm = LSTM(shared_latent_dim, return_sequences=True, 
                            return_state=True, name='decoder_lstm')
        decoder_dropout = Dropout(rate=hp.Float('decoder_dropout', 0, 0.7, 
                                step=0.1, default=0.5))
        decoder_dense = Dense(self.decoder_output_dim, 
                              activation='linear', name='decoder_dense')

        
        # ---------------------- MODEL ------------------------
        # Define the inputs for the encoder
        encoder_inputs = Input(shape=(self.Tx, self.encoder_input_dim), 
                               name='encoder_input')
        
        # Obtain the outputs from the first encoder layer
        encoder_out = encoder_lstm_1(encoder_inputs) 
        
        # Pass the outputs through a dropout layer before 
        # feeding them to the next LSTM layer
        encoder_out = encoder_dropout(encoder_out)
        
        # We discard the output and keep the states only.
        _, h, c = encoder_lstm_2(encoder_out)

        # Define an input for the decoder
        decoder_inputs = Input(shape=(self.Ty, self.decoder_input_dim), 
                               name='decoder_input')

        # Obtain all the outputs from the decoder (return_sequences = True)
        decoder_outputs, _, _  = decoder_lstm(decoder_inputs, initial_state=[h, c])

        # Apply dropout
        decoder_outputs = decoder_dropout(decoder_outputs)

        # Apply dense 
        decoder_outputs = decoder_dense(decoder_outputs)

        model = Model(inputs=[encoder_inputs, decoder_inputs], 
                      outputs=decoder_outputs)
        optimizer = Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, 
                                                sampling='log'))
        model.compile(optimizer=optimizer, loss=masked_mse)

        return model
        

In [37]:
model_builder = StackedSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=1000,
                     executions_per_trial=1,
                     directory='local-keras-tuner', 
                     project_name='stacked')



In [38]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=64,
             epochs=100,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=10, 
                                      verbose=1)])

Train on 67397 samples, validate on 3389 samples
Epoch 1/100
 4032/67397 [>.............................] - ETA: 27:20 - loss: 0.77 - ETA: 14:35 - loss: 0.78 - ETA: 10:15 - loss: 0.86 - ETA: 8:11 - loss: 0.8822 - ETA: 6:53 - loss: 0.879 - ETA: 6:06 - loss: 0.871 - ETA: 5:32 - loss: 0.821 - ETA: 5:05 - loss: 0.821 - ETA: 4:40 - loss: 0.796 - ETA: 4:22 - loss: 0.757 - ETA: 4:07 - loss: 0.757 - ETA: 3:55 - loss: 0.760 - ETA: 3:44 - loss: 0.739 - ETA: 3:34 - loss: 0.718 - ETA: 3:27 - loss: 0.699 - ETA: 3:22 - loss: 0.681 - ETA: 3:17 - loss: 0.665 - ETA: 3:11 - loss: 0.647 - ETA: 3:05 - loss: 0.636 - ETA: 3:01 - loss: 0.622 - ETA: 2:56 - loss: 0.616 - ETA: 2:53 - loss: 0.607 - ETA: 2:49 - loss: 0.597 - ETA: 2:46 - loss: 0.586 - ETA: 2:43 - loss: 0.572 - ETA: 2:40 - loss: 0.563 - ETA: 2:38 - loss: 0.558 - ETA: 2:36 - loss: 0.548 - ETA: 2:34 - loss: 0.542 - ETA: 2:34 - loss: 0.535 - ETA: 2:34 - loss: 0.525 - ETA: 2:34 - loss: 0.517 - ETA: 2:32 - loss: 0.509 - ETA: 2:30 - loss: 0.501 - ETA: 2:

KeyboardInterrupt: 

In [40]:
class BiStackedSeq2Seq(HyperModel):

    def __init__(self, Tx, Ty, encoder_input_dim, 
                 decoder_input_dim, decoder_output_dim):
        self.Tx = Tx
        self.Ty = Ty
        self.encoder_input_dim = encoder_input_dim
        self.decoder_input_dim = decoder_input_dim
        self.decoder_output_dim = decoder_output_dim

        
    def build(self, hp):

        # ------------------- SHARED LAYERS ---------------------
        encoder_latent_dim = hp.Int('encoder_latent_dim', min_value=32, 
                                    max_value=128, step=32)
        shared_latent_dim = hp.Int('shared_latent_dim', min_value=32, 
                                   max_value=128, step=32)
        
        encoder_lstm_1 = Bidirectional(LSTM(encoder_latent_dim, return_sequences=True, 
                              name='encoder_lstm_1'))
        encoder_dropout = Dropout(rate=hp.Float('encoder_dropout', 0, 0.7, 
                                        step=0.1, default=0.5))
        encoder_lstm_2 = LSTM(shared_latent_dim, return_state=True, 
                              name='encoder_lstm_2')
        decoder_lstm = LSTM(shared_latent_dim, return_sequences=True, 
                            return_state=True, name='decoder_lstm')
        decoder_dropout = Dropout(rate=hp.Float('decoder_dropout', 0, 0.7, 
                                step=0.1, default=0.5))
        decoder_dense = Dense(self.decoder_output_dim, 
                              activation='linear', name='decoder_dense')

        
        # ---------------------- MODEL ------------------------
        # Define the inputs for the encoder
        encoder_inputs = Input(shape=(self.Tx, self.encoder_input_dim), 
                               name='encoder_input')
        
        # Obtain the outputs from the first encoder layer
        encoder_out = encoder_lstm_1(encoder_inputs) 
        
        # Pass the outputs through a dropout layer before 
        # feeding them to the next LSTM layer
        encoder_out = encoder_dropout(encoder_out)
        
        # We discard the output and keep the states only.
        _, h, c = encoder_lstm_2(encoder_out)

        # Define an input for the decoder
        decoder_inputs = Input(shape=(self.Ty, self.decoder_input_dim), 
                               name='decoder_input')

        # Obtain all the outputs from the decoder (return_sequences = True)
        decoder_outputs, _, _  = decoder_lstm(decoder_inputs, initial_state=[h, c])

        # Apply dropout
        decoder_outputs = decoder_dropout(decoder_outputs)

        # Apply dense 
        decoder_outputs = decoder_dense(decoder_outputs)

        model = Model(inputs=[encoder_inputs, decoder_inputs], 
                      outputs=decoder_outputs)
        optimizer = Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, 
                                                sampling='log'))
        model.compile(optimizer=optimizer, loss=masked_mse)

        return model
        

In [41]:
model_builder = BiStackedSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=1000,
                     executions_per_trial=1,
                     directory='local-keras-tuner', 
                     project_name='bistacked')



In [42]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=64,
             epochs=100,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=10, 
                                      verbose=1)])

Train on 67397 samples, validate on 3389 samples
Epoch 1/100
11712/67397 [====>.........................] - ETA: 31:04 - loss: 1.12 - ETA: 16:35 - loss: 0.78 - ETA: 11:34 - loss: 0.66 - ETA: 9:07 - loss: 0.5638 - ETA: 7:38 - loss: 0.507 - ETA: 6:45 - loss: 0.466 - ETA: 6:04 - loss: 0.435 - ETA: 5:32 - loss: 0.413 - ETA: 5:07 - loss: 0.389 - ETA: 4:44 - loss: 0.375 - ETA: 4:31 - loss: 0.358 - ETA: 4:18 - loss: 0.350 - ETA: 4:10 - loss: 0.336 - ETA: 4:00 - loss: 0.323 - ETA: 3:51 - loss: 0.312 - ETA: 3:43 - loss: 0.300 - ETA: 3:36 - loss: 0.296 - ETA: 3:29 - loss: 0.291 - ETA: 3:23 - loss: 0.283 - ETA: 3:18 - loss: 0.277 - ETA: 3:13 - loss: 0.274 - ETA: 3:09 - loss: 0.270 - ETA: 3:08 - loss: 0.263 - ETA: 3:05 - loss: 0.261 - ETA: 3:01 - loss: 0.255 - ETA: 2:59 - loss: 0.251 - ETA: 2:55 - loss: 0.248 - ETA: 2:52 - loss: 0.244 - ETA: 2:51 - loss: 0.241 - ETA: 2:49 - loss: 0.237 - ETA: 2:48 - loss: 0.234 - ETA: 2:47 - loss: 0.231 - ETA: 2:45 - loss: 0.229 - ETA: 2:43 - loss: 0.227 - ETA: 2:

KeyboardInterrupt: 