### GRU RNN Implementation

In this notebook we will implement a GRU RNN in order to forecast the AQI with all of the available meteorological data at the Chicago SH Bell monitoring site.

In [3]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import Input, Dense, GRU, Embedding
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau


In [2]:
print(tf.__version__)
print(tf.keras.__version__)

2.1.0
2.2.4-tf


#### Making Train/Test Splits

In [33]:
aqi_df = pd.read_csv('./data/combined_meteor_and_sample.csv', index_col='Unnamed: 0', parse_dates=True)
aqi_df.head()

Unnamed: 0,AWS Mph WVc,AWD Deg WVc,SIG Deg WVc,Gust Mph Max,AvgT Deg_F Avg,ABP InHg Avg,Sample Value
2017-04-01 00:00:00,5.989,356.4,24.12,15.55,38.74,29.41,4.0
2017-04-01 01:00:00,5.059,348.0,22.69,11.3,39.46,29.44,6.0
2017-04-01 02:00:00,5.39,359.3,19.98,11.3,39.08,29.45,7.0
2017-04-01 03:00:00,5.233,352.6,19.27,11.59,38.95,29.47,6.0
2017-04-01 04:00:00,3.614,327.3,25.97,8.68,38.86,29.49,8.0


In [34]:
target_names = ['Sample Value']
shift_days = 1
shift_steps = shift_days*24 # Number of hours
aqi_df_targets = aqi_df[target_names].shift(-shift_steps)

In [35]:
X_data = aqi_df.values[0:-shift_steps]
y_data = aqi_df_targets.values[:-shift_steps]

In [36]:
num_data = len(X_data)
num_data

23337

In [37]:
train_split = 0.9

In [38]:
num_train = int(train_split * num_data)
num_train

21003

In [39]:
num_test = num_data - num_train
num_test

2334

In [40]:
X_train = X_data[0:num_train]
X_test = X_data[num_train:]
len(X_train) + len(X_test)

23337

In [41]:
y_train = y_data[0:num_train]
y_test = y_data[num_train:]
len(y_train) + len(y_test)

23337

In [42]:
num_X_signals = X_data.shape[1]
num_X_signals

7

In [43]:
num_y_signals = y_data.shape[1]
num_y_signals

1

In [70]:
X_train = np.nan_to_num(X_train)

#### Scaling the Data

In [71]:
X_scaler = MinMaxScaler()
X_train_scaled = X_scaler.fit_transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

y_scaler = MinMaxScaler()
y_train_scaled = y_scaler.fit_transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

In [76]:
def batch_generator(batch_size, sequence_length):
    """
    Generator function for creating random batches of training-data.
    """

    # Infinite loop.
    while True:
        # Allocate a new array for the batch of input-signals.
        x_shape = (batch_size, sequence_length, num_X_signals)
        x_batch = np.zeros(shape=x_shape, dtype=np.float16)

        # Allocate a new array for the batch of output-signals.
        y_shape = (batch_size, sequence_length, num_y_signals)
        y_batch = np.zeros(shape=y_shape, dtype=np.float16)

        # Fill the batch with random sequences of data.
        for i in range(batch_size):
            # Get a random start-index.
            # This points somewhere into the training-data.
            idx = np.random.randint(num_train - sequence_length)
            
            # Copy the sequences of data starting at this index.
            x_batch[i] = X_train_scaled[idx:idx+sequence_length]
            y_batch[i] = y_train_scaled[idx:idx+sequence_length]
        
        yield (x_batch, y_batch)

In [77]:
batch_size = 256
sequence_length = 24*7*8 # 8 week long sequences
generator = batch_generator(batch_size=batch_size,
                            sequence_length=sequence_length)
X_batch, y_batch = next(generator)

In [95]:
validation_data = (np.expand_dims(X_test_scaled, axis=0),
                  np.expand_dims(y_test_scaled, axis=0))

#### Building the GRU

In [80]:
model = Sequential()
model.add(GRU(units=512,
              return_sequences=True,
              input_shape=(None, num_X_signals,)))
model.add(Dense(num_y_signals, activation='sigmoid'))

In [85]:
warmup_steps = 50
def loss_mse_warmup(y_true, y_pred):
    """
    Calculate the Mean Squared Error between y_true and y_pred,
    but ignore the beginning "warmup" part of the sequences.
    
    y_true is the desired output.
    y_pred is the model's output.
    """

    # The shape of both input tensors are:
    # [batch_size, sequence_length, num_y_signals].

    # Ignore the "warmup" parts of the sequences
    # by taking slices of the tensors.
    y_true_slice = y_true[:, warmup_steps:, :]
    y_pred_slice = y_pred[:, warmup_steps:, :]

    # These sliced tensors both have this shape:
    # [batch_size, sequence_length - warmup_steps, num_y_signals]

    # Calculate the MSE loss for each value in these tensors.
    # This outputs a 3-rank tensor of the same shape.
    loss = tf.losses.mean_squared_error(y_true=y_true_slice,
                                        y_pred=y_pred_slice)

    # Keras may reduce this across the first axis (the batch)
    # but the semantics are unclear, so to be sure we use
    # the loss across the entire tensor, we reduce it to a
    # single scalar with the mean function.
    loss_mean = tf.reduce_mean(loss)

    return loss_mean

In [86]:
optimizer = RMSprop(lr=1e-3)
model.compile(loss=loss_mse_warmup, optimizer=optimizer)

In [87]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_2 (GRU)                  (None, None, 512)         798720    
_________________________________________________________________
dense_1 (Dense)              (None, None, 1)           513       
Total params: 799,233
Trainable params: 799,233
Non-trainable params: 0
_________________________________________________________________


In [89]:
path_checkpoint = 'aqi_checkpoint.keras'
callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint,
                                      monitor='val_loss',
                                      verbose=1,
                                      save_weights_only=True,
                                      save_best_only=True)

In [90]:
callback_tensorboard = TensorBoard(log_dir='./aqi_tf_logs/',
                                   histogram_freq=0,
                                   write_graph=False)

In [91]:
callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                       factor=0.1,
                                       min_lr=1e-4,
                                       patience=0,
                                       verbose=1)

In [93]:
callbacks = [callback_checkpoint,
             callback_tensorboard,
             callback_reduce_lr]

In [98]:
%%time
model.fit_generator(generator=generator,
                    epochs=20,
                    steps_per_epoch=100,
                    validation_data=validation_data,
                    verbose=1)

Epoch 1/20


ResourceExhaustedError:  OOM when allocating tensor with shape[512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node gru_2/while/body/_1/strided_slice_6}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_keras_scratch_graph_1832]

Function call stack:
keras_scratch_graph
