## Code for lstm network development. data preparation for lstm can be found in preprocess/seqdata.ipynb

In [1]:
import pandas as pd
import numpy as np
import time
import keras
from math import ceil
import time
import random
import glob,os
import matplotlib.pyplot as plt
import pickle
import pdb
from keras.models import Sequential, Model
from keras.layers import LSTM, Dense, Input, Embedding, Masking
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import MinMaxScaler
from math import sqrt
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from sklearn.base import BaseEstimator
from sklearn.model_selection import GridSearchCV

Using TensorFlow backend.


### Load Data, not uploaded on github
#### allauxdata columns' corresponding variables:
###### 'Snowy', 'Night', 'One way', 'two way', 'Two way with median', 'Speed Limit_30.0', Speed Limit_40.0', 'Speed Limit_50.0', 'Lane Width_2.5', 'Lane Width_2.75', 'Lane Width_3.0', 'Mean Arrival Rate_530.0', 'Mean Arrival Rate_750.0', 'Mean Arrival Rate_1100.0'

In [2]:
#load data
with open('/home/arash/ProjectVR/cleaneddata/seqdata', 'rb') as f:
    allseqdata = pickle.load(f)

X=allseqdata[0]
o1=allseqdata[1]
o2=allseqdata[2]
o3=allseqdata[3]
dist=allseqdata[4]
y=allseqdata[5]

#load data
with open('/home/arash/ProjectVR/cleaneddata/auxdata', 'rb') as f:
    allauxdata = pickle.load(f) 

### Data Normalization
#### #normalization using min max method: (keras min max not used due to format difference)

In [3]:
#o1,o2,o3: are already bw 0 and 1
maxTraj=[]
minTraj=[]
mindist=[]
for i in range(len(X)):                              #findimg max and min of each feature
    maxTraj.append(max((max(X[i]),max(y[i]))))
    minTraj.append(min((min(X[i]),min(y[i]))))
    mindist.append(min((min(dist[i]),min(dist[i]))))
    
    
maxXY=max(maxTraj)
maxdist=100
minXY=min(minTraj)
mindist=min(mindist)
Xscaled=[]
yscaled=[]
distscaled=[]
for i in range(len(X)):
    Xscaled.append((X[i]-minXY)/(maxXY-minXY))
    yscaled.append((y[i]-minXY)/(maxXY-minXY))
    distscaled.append((np.array(dist[i])-mindist)/(maxdist-mindist))
    

Xscaled=np.array(Xscaled)
yscaled=np.array(yscaled)
distscaled=np.array(distscaled)

### padding sequences
#### padding sequences to have same length by adding negative values:


In [4]:
Xscaled = pad_sequences(Xscaled, dtype='float32',value=-0.01)       
o1 = pad_sequences(o1,dtype='float32',value=-0.01)
o2 = pad_sequences(o2,dtype='float32',value=-0.01)
o3 = pad_sequences(o3,dtype='float32',value=-0.01)
distscaled = pad_sequences(distscaled,dtype='float32',value=-0.01)
yscaled = pad_sequences(yscaled, padding='post',dtype='float32',value=-0.01)

### Merging prepared data to create input and output data

In [5]:
inputseq=[]
maxlen = Xscaled.shape[1]
for i in range(Xscaled.shape[0]):
    auxpadded = np.ones(maxlen)*(-0.01)
    auxpadded[:14]=allauxdata[i]     #14 is the number of aux data
    mrg_input=np.transpose(np.vstack((Xscaled[i],o1[i],o2[i],o3[i],distscaled[i],auxpadded)))
    inputseq.append(mrg_input)

inputseq=np.array(inputseq)

### Seperating test set before any modeling

In [None]:
# seperate validation data and test set
tst=0.2                 #% if test data
#val=0.2                 #% if valid data       
tstsize = int(np.floor(len(X) * tst))
#valsize = int(np.floor(len(X) * val))


tstlabel = random.sample(range(0, (inputseq.shape[0]-1)), len(range(tstsize)))
inputseqTEST=inputseq[tstlabel]
ytest=yscaled[tstlabel]

trnlabel=[i for i in range(inputseq.shape[0]) if i not in tstlabel]
inputseqtrain = inputseq[trnlabel]          #Excluding test set
ytrain = yscaled[trnlabel]

### Define Vanilla Model

In [9]:
class VanillaLSTM(BaseEstimator):
    
    def __init__(self, nodes=50, batch_size=32,
                 epochs=400, Llayers=1, features=5, steps_in=83, steps_out=147
                 ):

        self.nodes = nodes
        self.batch_size = batch_size
        self.epochs = epochs
        self.Llayers= Llayers
        self.features = features
        self.steps_in = steps_in
        self.steps_out = steps_out
        self.maxXY=654.7
        self.minXY=650.1


    def create_model(self):
        model = Sequential()
        model.add(Masking(mask_value=-0.01, input_shape=(self.steps_in, self.features)))
        for i in range(self.Llayers-1):
            model.add(LSTM(self.nodes, activation='relu', return_sequences=True))
        model.add(LSTM(self.nodes, activation='relu'))
        model.add(Dense(self.steps_out))
        model.compile(optimizer='adam', loss='mse')
        return model
    
    def fit(self,inputseq,y):
        start = time.time()
        X = inputseq[:,:,:self.features]
        self.model=self.create_model()
        self.model.fit(X, y, epochs = self.epochs, verbose = 0, batch_size = self.batch_size)
        end = time.time()
        print ("Finished Fitting Model. # of Epochs: %d\n Time Taken : %d secs"
               % (self.epochs,end - start))
        return self
    
    def predict(self, inputseq):
        X = inputseq[:,:,:self.features]
        
        return self.model.predict(X)
    
    def score(self,inputseq,y):
        
        X = inputseq[:,:,:self.features]
        ypred = self.predict(inputseq)
        yinv = np.zeros(y.shape)
        ypredinv = np.zeros(ypred.shape)
        for i in range(len(y)):
            pos = sum (n>=0 for n in y[i])                #position of the last actual value on y, not padded
            yinv[i][:pos]= y[i][:pos] * (self.maxXY-self.minXY) + self.minXY
            ypredinv[i][:pos] = ypred[i][:pos]* (self.maxXY-self.minXY) + self.minXY
            
        rmseScore = -sqrt(mean_squared_error(ypredinv, yinv))    #negative of RMSE

        return rmseScore
        


### fitting Vanilla LSTM, using grid search for hypperparameters

In [14]:
n_features = (inputseq.shape[2]-1)
n_steps_in=inputseq.shape[1]
n_steps_out=ytrain.shape[1]

VLmodel=VanillaLSTM(epochs=400, features=n_features,
                    steps_in=n_steps_in, steps_out=n_steps_out)

In [15]:
tuned_params = {"Llayers" : [2,3]}#{"nodes" : [10,50,100],"batch_size": [32,64,128] ,"Llayers" : [1,2,3]}

gs = GridSearchCV(VLmodel, tuned_params,cv = 8, refit= True, n_jobs=2)

In [None]:
grid_result=gs.fit(inputseqtrain,ytrain)
#np.save('VLSTMGridResults.npy',grid_result.cv_results_)

In [None]:
bestVLSTM = grid_result.best_estimator_
bestVLSTM.score(inputseqTEST,ytest)

# define vanilla lstm model

model = Sequential()
model.add(Masking(mask_value=-0.01, input_shape=(n_steps_in, n_features)))
model.add(LSTM(50, activation='relu', return_sequences=True))
model.add(LSTM(50, activation='relu'))
model.add(Dense(n_steps_out))
model.compile(optimizer='adam', loss='mse')
history=model.fit(Xtrain, ytrain,
                  epochs=200, verbose=1,batch_size=32,
                  validation_data=(Xval, yval))
                  

# plot history
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
#pyplot.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()





#inverse normalization using min max method
def calculateRMSE (y,ypred,maxXY,minXY):    # function to calculate RMSE based on inverse normalized
    yinv = np.zeros(y.shape)
    ypredinv = np.zeros(ypred.shape)
    for i in range(len(y)):
        pos = sum (n>=0 for n in y[i])                #position of the last actual value on y, not padded
        yinv[i][:pos]= y[i][:pos] * (maxXY-minXY) + minXY
        ypredinv[i][:pos] = ypred[i][:pos]* (maxXY-minXY) + minXY
        
    rmse = sqrt(mean_squared_error(ypredinv, yinv))
    
    return rmse, ypredinv, yinv

# make a prediction on validation set
ypredVAL = model.predict(Xval)
rmse, ypredinv, yinv = calculateRMSE(yval,ypredVAL,maxXY,minXY)

print('Val RMSE: %.3f' % rmse)

# make a prediction on test set
ypredtest = model.predict(Xtest)
rmse, ypredinv, yinv = calculateRMSE(ytest,ypredtest,maxXY,minXY)

print('Test RMSE: %.3f' % rmse)

## lstm model with aux variables


In [None]:
class AuxLSTM(BaseEstimator):
    
    def __init__(self, nodes=50, batch_size=32,
                 epochs=1, Dlayers=1, Llayers=1, features=5, steps_in=83, steps_out=147
                 ):

        self.nodes = nodes
        self.features = features
        self.steps_in = steps_in
        self.steps_out = steps_out
        self.epochs = epochs
        self.batch_size = batch_size
        self.Dlayers= Dlayers   #number of hidden dense layers
        self.Llayers = Llayers  #number of lstm layers
        self.maxXY=654.7
        self.minXY=650.1


    def create_model(self):
        
        seq_input = Input(shape=(self.steps_in, self.features), dtype='float32', name='seq_input')
        mask = Masking(mask_value=-0.01)(seq_input)
        if self.Llayers==1:
            
            lstm_out = LSTM(self.nodes, activation='relu')(mask)
            
        else:
            lstm_out = LSTM(self.nodes,activation='relu',return_sequences=True)(mask)
            
            for i in range(self.Llayers-2):
                lstm_out = LSTM(self.nodes,activation='relu',return_sequences=True)(lstm_out)
                
            lstm_out = LSTM(self.nodes, activation='relu')(lstm_out)

        #output for lstm, corresponds to 0.2 of loss, used to smooth training and regularization:
        auxiliary_output = Dense(self.steps_out, activation='sigmoid' ,name='aux_output')(lstm_out)   

        auxiliary_input = Input(shape=(14,), name='aux_input')
        x = keras.layers.concatenate([lstm_out, auxiliary_input])

        # We stack a deep densely-connected network on top
        for i in range(self.Dlayers):
            x = Dense(self.nodes, activation='relu')(x)

        # And finally we add the main logistic regression layer
        main_output = Dense(self.steps_out, activation='sigmoid', name='main_output')(x)

        modelaux = Model(inputs=[seq_input, auxiliary_input], outputs=[main_output, auxiliary_output])


        modelaux.compile(optimizer='adam', loss='mse',
                      loss_weights=[1., 0.2])
        return modelaux
    
    def fit(self,inputseq,y):
        start = time.time()
        X = inputseq[:,:,:self.features]
        aux=inputseq[:,:,self.features][:,:14]      
        self.model=self.create_model()
        self.model.fit([X,aux], [y,y], epochs = self.epochs, verbose = 0, batch_size = self.batch_size)
        end = time.time()
        print ("Finished Fitting AuxModel. # of Epochs: %d\n Time Taken : %d secs"
               % (self.epochs,end - start))
        return self
    
    def predict(self, inputseq):
        X = inputseq[:,:,:self.features]
        aux=inputseq[:,:,self.features][:,:14]
        
        return self.model.predict([X,aux])
    
    def score(self,inputseq,y):
        X = inputseq[:,:,:self.features]
        aux = inputseq[:,:,self.features][:,:14]    
        ypred = self.predict(inputseq)[0]        #auxlstm has two identical outputs, 1st in chosen
        yinv = np.zeros(y.shape)
        ypredinv = np.zeros(ypred.shape)
        for i in range(len(y)):
            pos = sum (n>=0 for n in y[i])                #position of the last actual value on y, not padded
            yinv[i][:pos]= y[i][:pos] * (self.maxXY-self.minXY) + self.minXY
            ypredinv[i][:pos] = ypred[i][:pos]* (self.maxXY-self.minXY) + self.minXY
            
        rmseScore = -sqrt(mean_squared_error(ypredinv, yinv))    #negative of RMSE
        return rmseScore
        

# sequential input: meant to receive sequence data (inputseq)

seq_input = Input(shape=(n_steps_in, n_features), dtype='float32', name='seq_input')
mask=Masking(mask_value=0.0)(seq_input)
lstm_out = LSTM(50,activation='relu',return_sequences=True)(mask)
lstm_out=LSTM(50, activation='relu')(lstm_out)

#output for lstm, corresponds to 0.2 of loss, used to smooth training and regularization:
auxiliary_output = Dense(n_steps_out, name='aux_output')(lstm_out)   

auxiliary_input = Input(shape=(allauxdata.shape[1],), name='aux_input')
x = keras.layers.concatenate([lstm_out, auxiliary_input])

# We stack a deep densely-connected network on top
x = Dense(64, activation='relu')(x)

# And finally we add the main logistic regression layer
main_output = Dense(n_steps_out, name='main_output')(x)

modelaux = Model(inputs=[seq_input, auxiliary_input], outputs=[main_output, auxiliary_output])


modelaux.compile(optimizer='adam', loss='mse',
              loss_weights=[1., 0.2])

historyaux=modelaux.fit([Xtrain, auxdatatrain], [ytrain, ytrain],
          epochs=200, batch_size=32,validation_data=([Xval, auxdataval],[yval, yval]))

# plot history
plt.plot(historyaux.history['main_output_loss'], label='train')
plt.plot(historyaux.history['val_main_output_loss'], label='validation')
#pyplot.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()

# make a prediction on validation set
ypredVAL = modelaux.predict([Xval,auxdataval])

rmse, ypredinv, yinv = calculateRMSE(yval,ypredVAL[0],maxXY,minXY)

print('Val RMSE: %.3f' % rmse)

# make a prediction
ypredTST = modelaux.predict([Xtest,auxdatatest])
rmse, ypredinv, yinv = calculateRMSE(ytest,ypredTST[0],maxXY,minXY)

print('Test RMSE: %.3f' % rmse)

In [None]:
n_features = (inputseq.shape[2]-1)
n_steps_in=inputseq.shape[1]
n_steps_out=ytrain.shape[1]
n_nodes=50
n_baches=32
VLmodel=VanillaLSTM(epochs=1, features=n_features,
                    steps_in=n_steps_in, steps_out=n_steps_out)
 

In [None]:
tuned_params = {"Dlayers" : [1,2], "Llayers" : [1,2]}

gsaux = GridSearchCV(AuxLSTM(), tuned_params,cv = 2, refit= True, n_jobs=2)

In [None]:
gridaux_result=gsaux.fit(inputseqtrain,ytrain)

In [None]:
grid_result.cv_results_

In [None]:
#np.save('VLSTMGridResults.npy',grid_result.cv_results_)
bestauxLSTM = gridaux_result.best_estimator_
bestauxLSTM.score(inputseqTEST,ytest)

# Comparing model performac for RMSE of the validation set, 
### all models are trained on same training and validation set, over 200 epochs,
###### (Dense mentioned in models are hidden layers, output dense layer not counted
* **Vanilla 1**: Batch size=32, LSTM units=50,No Masking input, no sample weights for output, two LSTM layer, loss=mse, **RMSE=~71** 

* **Vanilla 2:** Batch size=32, LSTM units=50, Masking input, no sample weights for output, two LSTM layer, loss=mse, **VAL RMSE=~0.29, Test RMSE=0.684**

* **Vanilla 3:** Batch size=32, LSTM units=50, Masking input, -0.01 padded in order to not be considered in RMSE, two LSTM layer, loss=mse, **VAL RMSE=0.302, Test RMSE=0.546**



* **AuxLSTM 1:** Batch size=32, LSTM units=50, Masking input, no sample weights for output, two LSTM layers, , one hidden dense, loss=mse, **VAL RMSE=~0.197, Test RMSE=0.616**

* **AuxLSTM 2:** Batch size=32, LSTM units=50, Masking input, -0.01 padded in order to not be considered in RMSE, two LSTM layers, , one hidden dense, loss=mse, **VAL RMSE=~0.213, Test RMSE=0.538**