seq2seq model architectures:
- simple (encoder: lstm, decoder: lstm -> dense)
- stacked_encoder (encoder: lstm -> lstm, decoder: lstm -> dense)
- bistacked_encoder (encoder: bilstm -> lstm, decoder: lstm -> dense)
- stacked_decoder (encoder: lstm, decoder: lstm -> lstm -> dense)
- stacked (encoder: lstm -> lstm, decoder: lstm -> lstm -> dense)
- bistacked (encoder: bilstm -> lstm, decoder: lstm -> lstm -> dense)

### Google Colab utils

In [None]:
#!pip install keras-tuner

In [None]:
# # memory footprint support libraries/code
# !ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
# !pip install gputil
# !pip install psutil
# !pip install humanize
# import psutil
# import humanize
# import os
# import GPUtil as GPU
# GPUs = GPU.getGPUs()
# # XXX: only one GPU on Colab and isn’t guaranteed
# gpu = GPUs[0]
# def printm():
#  process = psutil.Process(os.getpid())
#  print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
#  print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
# printm() 

In [None]:
#!kill -9 -1

In [None]:
# from google.colab import drive
# drive.mount('/gdrive')
# %cd "/gdrive/My Drive/air-pollution"

### Modeling

In [1]:
import warnings
warnings.filterwarnings('ignore')

from seq2seq_models import * 
from kerastuner.tuners import RandomSearch

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


Bad key "text.kerning_factor" on line 4 in
/home/zafir/miniconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution


In [2]:
train_encoder_input_data = np.load('./data/third-order/Centar/train_encoder_input_data.npy')
train_decoder_input_data = np.load('./data/third-order/Centar/train_decoder_input_data.npy')
train_decoder_target_data = np.load('./data/third-order/Centar/train_decoder_target_data.npy')

valid_encoder_input_data = np.load('./data/third-order/Centar/valid_encoder_input_data.npy')
valid_decoder_input_data = np.load('./data/third-order/Centar/valid_decoder_input_data.npy')
valid_decoder_target_data = np.load('./data/third-order/Centar/valid_decoder_target_data.npy')

test_encoder_input_data = np.load('./data/third-order/Centar/test_encoder_input_data.npy')
test_decoder_input_data = np.load('./data/third-order/Centar/test_decoder_input_data.npy')
test_decoder_target_data = np.load('./data/third-order/Centar/test_decoder_target_data.npy')

In [3]:
print(train_encoder_input_data.shape)
print(train_decoder_input_data.shape)
print(train_decoder_target_data.shape)

(67396, 24, 23)
(67396, 12, 21)
(67396, 12, 2)


In [4]:
Tx, encoder_input_dim = (train_encoder_input_data.shape[1], 
                         train_encoder_input_data.shape[2])
    
Ty, decoder_input_dim = (train_decoder_input_data.shape[1], 
                         train_decoder_input_data.shape[2])

# we are predicting the pollution only, leave out the mask
decoder_output_dim = 1  

In [None]:
batch_size = 64
epochs = 250
max_trials = 250
executions_per_trial = 1
patience = 20

## Simple seq2seq

In [None]:
model_builder = SimpleSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=max_trials,
                     executions_per_trial=executions_per_trial,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='simple')

In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=batch_size,
             epochs=epochs,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=patience, 
                                      verbose=1)])

In [None]:
tuner.results_summary()

## Stacked encoder

In [None]:
model_builder = StackedEncoderSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=max_trials,
                     executions_per_trial=executions_per_trial,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='stacked-encoder')

In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=batch_size,
             epochs=epochs,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=patience, 
                                      verbose=1)])

## BiStacked encoder

In [None]:
model_builder = BiStackedEncoderSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=max_trials,
                     executions_per_trial=executions_per_trial,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='bistacked-encoder')

In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=batch_size,
             epochs=epochs,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=patience, 
                                      verbose=1)])

In [None]:
tuner.results_summary()

## Stacked decoder

In [None]:
model_builder = StackedDecoderSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=max_trials,
                     executions_per_trial=executions_per_trial,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='stacked-decoder')

In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=batch_size,
             epochs=epochs,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=patience, 
                                      verbose=1)])

In [None]:
tuner.results_summary()

## Stacked

In [None]:
model_builder = StackedSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=max_trials,
                     executions_per_trial=executions_per_trial,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='stacked')

In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=batch_size,
             epochs=epochs,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=patience, 
                                      verbose=1)])

## BiStacked

In [None]:
model_builder = BiStackedSeq2Seq(Tx, Ty, encoder_input_dim, 
                             decoder_input_dim, decoder_output_dim)

tuner = RandomSearch(model_builder,
                     objective='val_loss',
                     max_trials=max_trials,
                     executions_per_trial=executions_per_trial,
                     directory='local-keras-tuner/seq2seq', 
                     project_name='bistacked')

In [None]:
tuner.search(x=[train_encoder_input_data, 
                train_decoder_input_data], 
             y=train_decoder_target_data,
             validation_data=([
                valid_encoder_input_data,
                valid_decoder_input_data],
                valid_decoder_target_data),
             batch_size=batch_size,
             epochs=epochs,
             callbacks=[EarlyStopping(monitor='val_loss', 
                                      patience=patience, 
                                      verbose=1)])