In [1]:
# Average window_stride elements together to form a single row
WINDOW_STRIDE = 12

SAMPLE_HOURS = WINDOW_STRIDE / 12.0

# Number of future samples to mean for prediction
PREDICTION_WINDOW = int(24 / SAMPLE_HOURS)

# Length of the windowed sequence
SEQUENCE_LENGTH = int(7*24 / SAMPLE_HOURS)

# Input Features
INPUT_COLUMNS = ['epoch', 'day_of_year', 'hour', 'temp', 'windspd', 'winddir', 'wind_x_dir', 'wind_y_dir', 'no', 'no2', 'nox', 'o3']
OUTPUT_COLUMNS = ['no', 'no2', 'nox', 'o3']

# Take the FFT of each sqeuence and use as features
FFT_FEATURES = False

# Fit the sequence to y = mx+b and add the coeff / intercept
REGRESSION_FEATURES = True

# Add variance for each feature in the sequence
STD_FEATURES = True

INPUT_MAP = {value: idx for idx, value in enumerate(INPUT_COLUMNS)}
OUTPUT_MAP = {value: idx for idx, value in enumerate(OUTPUT_COLUMNS)}

NUM_INPUTS = len(INPUT_COLUMNS)
NUM_OUTPUTS = len(OUTPUT_COLUMNS)

In [1]:
import numpy as np

data_sequences = np.load('000_sequences.npy')
data_latlong = np.load('000_latlong_features.npy')
data_sequence_features = np.load('000_sequence_features.npy')
labels = np.load('000_labels.npy')

data = data_sequences

In [25]:
from keras.models import Model
from keras import backend as K
from keras.layers import Dense, LSTM, Input, Flatten
from keras.callbacks import LearningRateScheduler, ModelCheckpoint, TensorBoard

def r2(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

layer_input = Input(shape=(SEQUENCE_LENGTH, NUM_INPUTS), name='inputs')
layer_lstm = LSTM(64, return_sequences=True, dropout=0.5)(layer_input)
layer_flatten = Flatten()(layer_lstm)

layer_output = Dense(NUM_OUTPUTS, activation='linear', name='outputs')(layer_flatten)

model = Model(inputs=[layer_input], outputs=[layer_output])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=[r2])
model.summary()

def sched(epoch, lr):
    new_lr = 0.001 * (0.5 ** epoch)
    print("Epoch(%d) LR: %f" % (epoch+1, new_lr))
    return new_lr

lr_decay = LearningRateScheduler(schedule=sched) 

filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_r2', verbose=1, save_best_only=True, mode='max')

tensorboard = TensorBoard(log_dir='./tb', histogram_freq=0, batch_size=128, write_graph=True, write_grads=False)

model.fit(x=data, y=labels, batch_size=128, epochs=100, validation_split=0.2, verbose=True, callbacks=[lr_decay, checkpoint, tensorboard])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 168, 12)           0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 168, 64)           19712     
_________________________________________________________________
flatten_6 (Flatten)          (None, 10752)             0         
_________________________________________________________________
outputs (Dense)              (None, 4)                 43012     
Total params: 62,724
Trainable params: 62,724
Non-trainable params: 0
_________________________________________________________________
Train on 86320 samples, validate on 21580 samples
Epoch 1/100
Epoch(1) LR: 0.001000

Epoch 00001: val_r2 improved from -inf to 0.79192, saving model to weights.best.hdf5
Epoch 2/100
Epoch(2) LR: 0.000500

Epoch 00002: val_r2 did not improve from 0.79192
Epoch 3/100
Epoc

KeyboardInterrupt: 