# Load Data

In [1]:
import pickle
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
with open('../data/processed/rdp_ds/adj_mat.dat', 'rb')  as f:
    adj_mat = pickle.load(f)

with open('../data/processed/rdp_ds/adj_mat_ind_station_mapper.dat', 'rb') as f:
    ind_station_mapper = pickle.load(f)

with open('../data/processed/rdp_ds/speeds.dat', 'rb')  as f:
    speed_df = pickle.load(f)

**Get Time Series for Station with Most Data**

In [None]:
station_speed = speed_df[speed_df.apply(lambda x: x.isna().sum()).idxmin()]
station_speed = station_speed.fillna(method='ffill') # ffill since only 2 missing values. we could also drop these
station_speed = station_speed[station_speed.index.month == 6] # subset and choose data in june
station_speed

In [None]:
fig = px.line(x=station_speed.index, y=station_speed, title='Time Series Plot')
fig.update_xaxes(title='Time')
fig.update_yaxes(title='Speed (mph)')

# Prepare Data for Model

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [None]:
# convert an array of values into a dataset matrix
def construct_dataset(dataset, look_back=1):
    X = []
    y = []
    for i in range(dataset.shape[0] - look_back - 1):
        X.append(dataset[i:(i+look_back)].flatten())
        y.append(dataset[i + look_back].flatten())
    return np.array(X), np.array(y)

def reshape_inp(inp):
    return inp.reshape(inp.shape[0], 1, inp.shape[1])

In [None]:
# normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_speeds = scaler.fit_transform(station_speed.dropna().values.reshape(-1, 1))

# create train-test data
cutoff = int(scaled_speeds.shape[0] * 0.75)
train = scaled_speeds[:cutoff]
test = scaled_speeds[cutoff:]

# build dataset
num_lags = 1
X_train, y_train = construct_dataset(train, look_back=num_lags)
X_test, y_test = construct_dataset(test, look_back=num_lags)

# reshape inp
X_train = reshape_inp(X_train)
X_test = reshape_inp(X_test)

# Build Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import keras

In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, num_lags)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, epochs=25, batch_size=1, verbose=2)

In [None]:
# model.save('./trained/LSTM')

# Evaluate

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# make predictions
pred_train = model.predict(X_train)
pred_test = model.predict(X_test)

# invert predictions
pred_train = scaler.inverse_transform(pred_train)
y_train = scaler.inverse_transform(y_train)
pred_test = scaler.inverse_transform(pred_test)
y_test = scaler.inverse_transform(y_test)

In [None]:
# shift train predictions for plotting
train_pred_plot = np.empty_like(scaled_speeds)
train_pred_plot[:, :] = np.nan
train_pred_plot[num_lags:(len(pred_train) + num_lags), :] = pred_train

# shift test predictions for plotting
test_pred_plot = np.empty_like(scaled_speeds)
test_pred_plot[:, :] = np.nan
test_pred_plot[num_lags:(len(pred_test) + num_lags), :] = pred_test

In [None]:
import plotly.graph_objects as go

In [None]:
fig = go.Figure()
fig.add_trace(go.Line(x=station_speed.index, y=station_speed, name='True Values'))
fig.add_trace(go.Line(x=station_speed[:cutoff].index, y=train_pred_plot.flatten(), name='Predicted Values (Train)'))
fig.add_trace(go.Line(x=station_speed[cutoff:].index, y=test_pred_plot.flatten(), name='Predicted Values (Test)'))
fig.update_layout(
    title="LSTM Forecast Results",
    xaxis_title="Time",
    yaxis_title="Forecast")

In [None]:
# calculate root mean squared error
rmse_train = mean_squared_error(y_train[:, 0], pred_train[:,0], squared=False)
print('Train Score: %.2f RMSE' % (mse_train))
rmse_test = mean_squared_error(y_test[:, 0], pred_test[:,0], squared=False)
print('Test Score: %.2f RMSE' % (mse_test))

In [None]:
# with open('./trained/LSTM/metrics_LSTM.dat', 'wb') as f:
#     metrics = {'mse': mean_squared_error(y_test[:, 0], pred_test[:,0]), 'rmse': rmse_test, 'r2': r2_score(y_test[:, 0], pred_test[:,0])}
#     pickle.dump(metrics, f)