# Grab-Microsoft Challenge 
## Traffic Management

- geohash6: geohash is a public domain geocoding system which encodes a geographic location into a short string of letters and digits with arbitrary precision. You are free to use any geohash library to encode/decode the geohashes into latitude and longitude or vice versa.(Examples:https://github.com/hkwi/python-geohash)
- day: the value indicates the sequential order and not a particular day of the month
- timestamp: start time of 15-minute intervals in the following format: <hour>:<minute>, where hour ranges from 0 to 23 and minute is either one of (0, 15, 30, 45)
- demand: aggregated demand normalised to be in the range [0,1]
    
## Problem Statements:
- Which areas have high / low traffic demand?
- How does regional traffic demand change according to day / time?
- Forecast the travel demand for next 15min / 1hour and predict areas with high travel demand

### This is train LSTM model 

#### Import python library

In [None]:
import numpy as np
from numpy import array
import pandas as pd
import matplotlib.pyplot as plt

# plot matplotlib graph
%matplotlib inline

#Import models from scikit learn module:
from sklearn.metrics import mean_squared_error
from keras.models import Sequential, load_model
from keras.layers import Dense, Flatten, LSTM, BatchNormalization
from keras import regularizers
from sklearn.model_selection import train_test_split

import h5py
import sklearn.metrics as metrics
from keras.callbacks import ModelCheckpoint,CSVLogger
from keras.models import Model
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Bidirectional
from keras.utils import plot_model 

import pickle
import joblib


In [None]:
import os 
os.environ["CUDA_VISIBLE_DEVICES"]="0"

### Define Variables

In [None]:
seed = 7
np.random.seed(seed)
modelname   = 'Bi-LSTM1'
batch_size  = 8192
no_of_epoch = 18
no_of_train = 0.8

# choose a number of time steps
n_steps_in, predict_next_no_of_output = 3, 2

### Define Function

In [None]:
def createModel(): 
    inputs  = Input(shape=(X_train.shape[1],X_train.shape[2]))
    y = Bidirectional(LSTM(units=128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))(inputs)
    y = BatchNormalization()(y)
    y = Dense(predict_next_no_of_output, activation='sigmoid')(y)
  
    model = Model(inputs=inputs,outputs=y)
    model.compile(loss='mse',optimizer='adam', metrics=['mse', 'mae'])
    return model

def createModel(): 
    model = Sequential()
    model.add(Bidirectional(LSTM(20, return_sequences=True, dropout=0.2, recurrent_dropout=0.2), input_shape=(X_train.shape[1],X_train.shape[2])))
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    model.add(Dense(predict_next_no_of_output, activation='sigmoid')(y))

    model.compile(loss='mse',optimizer='adam', metrics=['mse', 'mae'])
    return model

# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out-1
        # check if we are beyond the dataset
        if out_end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1:out_end_ix, -1]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

#### Reading from Preprocessed & Test sample dataset

In [None]:
xls  = pd.ExcelFile('data/test_sample.xlsx')
test_sample = pd.read_excel(xls, 'TrafficMgmt')

In [None]:
xls  = pd.ExcelFile('data/Dataset_feature.xlsx')
data = pd.read_excel(xls, 'TrafficMgmt')

In [None]:
predictor_var = ['day', 'hour', 'min', 'dow', 'lat', 'long']
outcome_var   = 'demand'

#### Prepare training & test data
- Since this is a time series dataset, we can try using LSTM 

In [None]:
dataset = data.to_numpy()

In [None]:
# covert into input/output
X, y = split_sequences(dataset, n_steps_in, predict_next_no_of_output)
print(X.shape, y.shape)

#### Train-test-val dataset split

In [None]:
X_train1 = X[:int(no_of_train*len(X))]
y_train1 = y[:int(no_of_train*len(X))]
X_test  = X[int(no_of_train*len(X)):]
y_test  = y[int(no_of_train*len(X)):]

X_train = X_train1[:int(no_of_train*len(X_train1))]
y_train = y_train1[:int(no_of_train*len(X_train1))]
X_val   = X_train1[int(no_of_train*len(X_train1)):]
y_val   = y_train1[int(no_of_train*len(X_train1)):]

In [None]:
print(X_train.shape, y_train.shape,X_test.shape, y_test.shape,X_val.shape, y_val.shape)

<h2><center>Create Model and test</center><h2>

<h3>LSTM<h3>

In [None]:
model = createModel1()
model.summary()

In [None]:
model.compile(loss='mse',optimizer='adam', metrics=['mse', 'mae'])

In [None]:
# Create checkpoints to save model during training and save training data into csv
# ‘monitor’ can be ‘val_acc’ or ‘val_loss’
# When set to ‘val_acc’, ‘mode’ must be ‘max’; when set to ‘val_loss’, ‘mode’ must be ‘min’

filepath       = modelname + ".hdf5"
checkpoint     = ModelCheckpoint(filepath, monitor='val_loss',verbose=0,save_best_only=True,mode='min') 
csv_logger     = CSVLogger(modelname + '.csv')
callbacks_list = [checkpoint,csv_logger]

In [None]:
# The line for training
hisgtory = model.fit(X_train, 
                     y_train, 
                     validation_data=(X_val, y_val), 
                     epochs=no_of_epoch, 
                     batch_size=batch_size,
                     shuffle=False,
                     callbacks=callbacks_list) 

In [None]:
plot_model(model, 
             to_file=modelname + '.pdf', 
             show_shapes=True, 
             show_layer_names=False,
             rankdir='TB') 

### validation dataset test result

In [None]:
predicts = model.predict(X_val)
print(mean_squared_error(y_val,predicts))

### Test dataset test result

In [None]:
predicted_value = model.predict(X_test)
print(mean_squared_error(y_test,predicted_value))

In [None]:
records     = pd.read_csv(modelname +'.csv')
plt.figure(figsize=(15,10))
plt.subplot(211)
plt.plot(records['val_mse'],label="val_mse")
plt.plot(records['val_loss'],label="val_loss")
plt.plot(records['loss'],label="loss")
plt.title('MSE',fontsize=12)
plt.legend(loc="upper left",fontsize=15)

ax          = plt.gca()
ax.set_xticklabels([])

plt.subplot(212)
plt.plot(records['val_mae'],label="val_mae")
plt.plot(records['val_loss'],label="val_loss")
plt.plot(records['loss'],label="loss")
plt.title('MAE',fontsize=12)
plt.legend(loc="upper left",fontsize=15)

plt.show()



### Load save LSTM model

In [None]:
# Load your own trained model
model = load_model(filepath, compile = False)

### Sample Testing 

In [None]:
scaler_filename = "grab_msft_scaler.save"
scaler          = joblib.load(scaler_filename) 

#### First set of testdata

In [None]:
# Apply Standardscalar to sample test data
test_sample[predictor_var] = scaler.fit_transform(test_sample[predictor_var])

In [None]:
# covert into input/output
test_sample_array = test_sample.to_numpy()
X_sample, y_sample = split_sequences(test_sample_array, n_steps_in, predict_next_no_of_output)
print(X_sample.shape, y_sample.shape)

In [None]:
predicted_value = model.predict(X_sample)
print('Predicted value : {} \n Actual Value    : {}' .format(predicted_value,y_sample))

In [None]:
print(mean_squared_error(y_sample,predicted_value))

### single sample test

In [None]:
# Data input sequence format :
test_data1  = [[18,20, 0, 4, -5.353088, 90.653687],
               [10,14,30, 3, -5.413513, 90.664673],
               [ 9, 6,15, 2, -5.325623, 90.906372]]
test_data2  = [[32, 5, 0, 4, -5.353088, 90.752563],
               [15, 4, 0, 1, -5.413513, 90.719604],
               [ 1,12,15, 1, -5.336609, 90.609741]]
test_data3  = [[25, 3,30, 4, -5.391541, 90.818481],
               [51,20,45, 2, -5.408020, 90.631714],
               [48, 6,15, 6, -5.364075, 90.763550]]
test_data4  = [[ 4,22,15, 4, -5.402527, 90.675659],
               [45, 9,15, 3, -5.402527, 90.917358],
               [52,11,45, 3, -5.364075, 90.664673]]
test_data5  = [[46,12,15, 4, -5.353088, 90.642700],
               [34,14,45, 6, -5.375061, 90.807495],
               [40, 2,30, 5, -5.424500, 90.785522]]
test_data6  = [[14,14,45, 0, -5.391541, 90.598755],
               [27, 3,30, 6, -5.320129, 90.785522],
               [ 6,23,45, 6, -5.358582, 90.752563]]
test_data7  = [[48,11,30, 6, -5.391541, 90.609741],
                [17,23,45, 3, -5.292664, 90.829468],
                [56,10, 0, 0, -5.413513, 90.774536]]

test_target1 = [[0.102821],[0.088755]]
test_target2 = [[0.023843],[0.007460]]
test_target3 = [[0.054170],[0.123463]]
test_target4 = [[0.359406],[0.514136]]
test_target5 = [[0.026409],[0.013998]]
test_target6 = [[0.029400],[0.057255]]
test_target7 = [[0.008772],[0.119240]]

In [None]:
Data_scaled = scaler.transform(test_data1)
Data_scaled = Data_scaled.reshape(1,n_steps_in,6)
predicted_value = model.predict(Data_scaled)
print('Predicted value : {}\nActual Value    : {}' .format(predicted_value,test_target1))

In [None]:
Data_scaled = scaler.transform(test_data2)
Data_scaled = Data_scaled.reshape(1,n_steps_in,6)
predicted_value = model.predict(Data_scaled)
print('Predicted value : {}\nActual Value    : {}' .format(predicted_value,test_target2))

In [None]:
Data_scaled = scaler.transform(test_data3)
Data_scaled = Data_scaled.reshape(1,n_steps_in,6)
predicted_value = model.predict(Data_scaled)
print('Predicted value : {}\nActual Value    : {}' .format(predicted_value,test_target3))

In [None]:
Data_scaled = scaler.transform(test_data4)
Data_scaled = Data_scaled.reshape(1,n_steps_in,6)
predicted_value = model.predict(Data_scaled)
print('Predicted value : {}\nActual Value    : {}' .format(predicted_value,test_target4))

In [None]:
Data_scaled = scaler.transform(test_data5)
Data_scaled = Data_scaled.reshape(1,n_steps_in,6)
predicted_value = model.predict(Data_scaled)
print('Predicted value : {}\nActual Value    : {}' .format(predicted_value,test_target5))

In [None]:
Data_scaled = scaler.transform(test_data6)
Data_scaled = Data_scaled.reshape(1,n_steps_in,6)
predicted_value = model.predict(Data_scaled)
print('Predicted value : {}\nActual Value    : {}' .format(predicted_value,test_target6))

In [None]:
Data_scaled = scaler.transform(test_data7)
Data_scaled = Data_scaled.reshape(1,n_steps_in,6)
predicted_value = model.predict(Data_scaled)
print('Predicted value : {}\nActual Value    : {}' .format(predicted_value,test_target7))