In [20]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, GRU, Input
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, SpatialDropout1D
from keras.layers import BatchNormalization, Conv1D, MaxPooling1D
from keras.optimizers import SGD
from keras.models import Model, load_model
from keras import initializers, regularizers, constraints, optimizers, layers, callbacks
from keras import backend as K
from keras.engine import InputSpec, Layer
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, TensorBoard, Callback, EarlyStopping, ReduceLROnPlateau

data_path = "data/food_data.csv"
attention_model_path = "model_checkpoint/best_attention_model.hdf5"
model_path = "model_checkpoint/best_model.hdf5"

In [21]:
# Preprocessing of Data

dataset = pd.read_csv('data/food_data.csv', index_col='Date', parse_dates=['Date'])
dataset.tail()

Unnamed: 0_level_0,Y,X
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-04-03 05:50:14,6.071375,49900
2019-04-03 05:50:34,3.684685,49920
2019-04-03 05:50:54,6.068947,49940
2019-04-03 05:51:14,6.067733,49960
2019-04-03 05:51:34,2.7602,49980


In [22]:
split_seconds = "2019-04-03 16:56:00"
training_set = dataset[:split_seconds].iloc[:,1:2].values
test_set = dataset[split_seconds:].iloc[:,1:2].values
test = dataset[:split_seconds].iloc[:,1:2]
test.shape

(1228, 1)

In [23]:
# Scaling the training set
sc = MinMaxScaler()
training_set_scaled = sc.fit_transform(training_set)



In [24]:
# Since LSTMs store long term memory state, we create a data structure with 60 timesteps and 1 output
# So for each element of training set, we have 60 previous training set elements 
X_train = []
y_train = []
for i in range(60,training_set_scaled.shape[0]):
    X_train.append(training_set_scaled[i-60:i,0])
    y_train.append(training_set_scaled[i,0])
X_train, y_train = np.array(X_train), np.array(y_train)

# Reshaping X_train for efficient modelling
X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1],1))

In [6]:
# Defining Attention Neuron and Attention Model
class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        """
        Keras Layer that implements an Attention mechanism for temporal data.
        Supports Masking.
        Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756]
        # Input shape
            3D tensor with shape: `(samples, steps, features)`.
        # Output shape
            2D tensor with shape: `(samples, features)`.
        :param kwargs:
        Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
        The dimensions are inferred based on the output shape of the RNN.
        Example:
            model.add(LSTM(64, return_sequences=True))
            model.add(Attention())
        """
        self.supports_masking = True
        #self.init = initializations.get('glorot_uniform')
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def call(self, x, mask=None):
        # eij = K.dot(x, self.W) TF backend doesn't support it

        # features_dim = self.W.shape[0]
        # step_dim = x._keras_shape[1]

        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
    #print weigthted_input.shape
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        #return input_shape[0], input_shape[-1]
        return input_shape[0],  self.features_dim

def build_attention_model(X_train, lr=0.0, lr_d=0.0, units=0, spatial_dr=0.0, dense_units=128, dr=0.1, use_attention=True):
    max_len = X_train.shape[1]
    inp = Input(shape = (max_len,1))
    x_gru = LSTM(units * 2, return_sequences = True)(inp)
    if use_attention:
        x_att = Attention(max_len)(x_gru)
        x = Dropout(dr)(Dense(dense_units, activation='relu') (x_att))
    else:
        x_att = Flatten() (x_gru)
        x = Dropout(dr)(Dense(dense_units, activation='relu') (x_att))

    x = BatchNormalization()(x)
    x = Dropout(dr)(Dense(int(dense_units / 2), activation='relu') (x))
    x = Dense(1, activation = "sigmoid")(x)
    model = Model(inputs = inp, outputs = x)
    model.compile(loss = "binary_crossentropy", optimizer = Adam(lr = lr, decay = lr_d), metrics = ["mean_squared_error"])
    model.summary()
    return model

In [7]:
# LSTM and GRU Model

def create_model(X_train):
    model = Sequential()
    
    # First layer with Dropout regularisation
    model.add(GRU(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation='relu'))
    model.add(Dropout(0.2))
    
    # Second layer
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation='relu'))
    model.add(Dropout(0.2))
    
    # Third layer
    model.add(GRU(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation='relu'))
    model.add(Dropout(0.2))

    # Fourth layer
    model.add(LSTM(units=50, activation='relu'))
    model.add(Dropout(0.2))

    # Output layer
    model.add(Dense(units=1))

    # Compiling the RNN
    model.compile(optimizer = SGD(lr=0.01, decay=1e-7, momentum=0.9, nesterov=False), metrics = ["mean_squared_error"], loss='mean_squared_error')
    return model

In [5]:
# Loading Models with Trained Weights of the best model with the lowest validation loss

model = create_model()
model.load_weights(file_path)

attention_model = build_attention_model(lr = 1e-3, lr_d = 1e-7, units = 128, spatial_dr = 0.3, dense_units=25, dr=0.1, use_attention=True)
attention_model.load_weights(attention_file_path)

NameError: name 'X_train' is not defined