# Load Data

In [None]:
import pickle
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import sys
sys.path.append('../utils')
from utils import load_processed_data, cv, get_test_metrics

In [None]:
adj_mat, ind_station_mapper, speed_df = load_processed_data('../data/processed/rdp_ds')

In [None]:
with open('./env.dat', 'rb') as f:
    ENV = pickle.load(f)

**Choose Station**

In [None]:
station_speed = speed_df[ENV['station_id']]
station_speed = station_speed[station_speed.index.month.isin([5, 6, 7])] # subset and choose data in may-july
station_speed

In [None]:
fig = px.line(x=station_speed.index, y=station_speed, title='Time Series Plot')
fig.update_xaxes(title='Time')
fig.update_yaxes(title='Speed (mph)')

# Prepare Data for Model

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [None]:
# convert an array of values into a dataset matrix
def construct_dataset(dataset, look_back=1):
    X = []
    y = []
    for i in range(dataset.shape[0] - look_back - 1):
        X.append(dataset[i:(i+look_back)].flatten())
        y.append(dataset[i + look_back].flatten())
    return np.array(X), np.array(y)

def reshape_inp(inp):
    return inp.reshape(inp.shape[0], 1, inp.shape[1])

In [None]:
scaled_speeds.shape

In [None]:
# normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_speeds = scaler.fit_transform(station_speed.values.reshape(-1, 1))

# create train-test data
cutoff = station_speed[(station_speed.index.month == 5) | (station_speed.index.month == 6)].index.shape[0]
train = scaled_speeds[:cutoff]
test = scaled_speeds[cutoff:]

# build dataset
num_lags = 2
X_train, y_train = construct_dataset(train, look_back=num_lags)
X_test, y_test = construct_dataset(test, look_back=num_lags)

# reshape inp
X_train = reshape_inp(X_train)
X_test = reshape_inp(X_test)

# Build Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import keras
from time import time

In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, num_lags)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
start = time()
model.fit(X_train, y_train, epochs=25, batch_size=1, verbose=2)
end = time()

In [None]:
# model.save('./trained/LSTM')

# Evaluate

In [None]:
# make predictions
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)

# invert predictions
train_preds = scaler.inverse_transform(train_preds)
y_train = scaler.inverse_transform(y_train)
test_preds = scaler.inverse_transform(test_preds)
y_test = scaler.inverse_transform(y_test)

In [None]:
# shift train predictions for plotting
train_pred_plot = np.empty_like(scaled_speeds)
train_pred_plot[:, :] = np.nan
train_pred_plot[num_lags:(len(train_preds) + num_lags), :] = train_preds

# shift test predictions for plotting
test_pred_plot = np.empty_like(scaled_speeds)
test_pred_plot[:, :] = np.nan
test_pred_plot[num_lags:(len(test_preds) + num_lags), :] = test_preds

In [None]:
import plotly.graph_objects as go

In [None]:
fig = go.Figure()
fig.add_trace(go.Line(x=station_speed.index, y=station_speed, name='True Values'))
fig.add_trace(go.Line(x=station_speed[:cutoff].index, y=train_pred_plot.flatten(), name='Predicted Values (Train)'))
fig.add_trace(go.Line(x=station_speed[cutoff:].index, y=test_pred_plot.flatten(), name='Predicted Values (Test)'))
fig.update_layout(
    title="LSTM Forecast Results",
    xaxis_title="Time",
    yaxis_title="Forecast")

In [None]:
# fig.write_html('../plots/LSTM.html')

In [None]:
cv_metrics = cv(model, [X_train, y_train], metrics=['mse', 'mae', 'rmse', 'r2'], epochs=10)
test_metrics = get_test_metrics(y_test, test_preds)
metrics = {'cv': cv_metrics, 'test': test_metrics}

In [None]:
# with open('./trained/LSTM/metrics_LSTM.dat', 'wb') as f:
#     pickle.dump(metrics, f)