In [None]:
from sklearn.model_selection import GridSearchCV

## Load All Data

In [None]:
import pickle
import yaml
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tensorflow as tf
import sys
sys.path.append('../utils')
from utils import load_processed_data, cv, get_test_metrics

adj_mat, ind_station_mapper, speed_df = load_processed_data('../data/processed/rdp_ds')

with open('../models/env.yaml') as f:
    ENV = yaml.load(f, Loader=yaml.FullLoader)
    
station_speed = speed_df[ENV['station_id']]
station_speed = station_speed[station_speed.index.month.isin([5, 6, 7])] # subset and choose data in may-july

In [None]:
class WindowGenerator():
    def __init__(self, input_width, label_width, shift,
               train, test):
        # Store the raw data.
        self.train = train
        self.test = test

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
        
        
    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]

        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])

        return inputs, labels
    
    def make_dataset(self, data):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.utils.timeseries_dataset_from_array(data=data, targets=None, 
                                                          sequence_length=self.total_window_size, 
                                                          sequence_stride=1, shuffle=False, batch_size=1)
        ds = ds.map(self.split_window)
        return ds
    
    def get_train(self):
        return self.make_dataset(self.train.to_frame())

    def get_test(self):
        return self.make_dataset(self.test.to_frame())


    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}'])

In [None]:
# initialize sliding window
num_lags = ENV['num_lags']
window = WindowGenerator(train=train, test=test, input_width=num_lags, 
                              label_width=1, shift=1)

mod_train = window.get_train()
mod_test = window.get_test()

X_train = []
y_train = []
for X, y in mod_train:
    X_train.append(X[0, :, :])
    y_train.append(y[0, :, :])
X_train = np.array(X_train)
y_train = np.array(y_train)
y_train = y_train.reshape(y_train.shape[0], 1)


In [None]:
import os
aman_path_to_capstone_dir = 'drive/MyDrive/School/Undergrad/Spring 2022/Capstone/Models/' 
os.chdir(f'{aman_path_to_capstone_dir}STGCN Training/models')
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
!pip3 install prophet
import sys
sys.path.append('/content/drive/MyDrive/School/Undergrad/Spring 2022/Capstone/Models/STGCN Training/utils/')
from utils import load_processed_data

adj_mat, ind_station_mapper, speeds = load_processed_data('../data/processed/fwy_405_n_ds')

import yaml

with open('../models/env.yaml') as f:
    ENV = yaml.load(f, Loader=yaml.Loader)
    
# subset and choose data in may-july
speeds = speeds[speeds.index.month.isin([5, 6, 7])] 

# write to file
outfile = '../data/processed/fwy_405_n_ds/speeds_form.csv'
speeds.to_csv(outfile, index=False, header=False)

# write to file
outfile = '../data/processed/fwy_405_n_ds/adj_mat_form.csv'
pd.DataFrame(adj_mat).to_csv(outfile, index=False, header=False)

## Grid Search LSTM

In [None]:
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 
             'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']

grid_values = {'n_lags':[1,3,5,10]}

grid = GridSearchCV(estimator=model_CV, param_grid=grid_values, n_jobs=-1, cv=3)

grid_result = grid.fit(X_train, y_train)

In [None]:
# print results
print(f'Best Accuracy for {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

## Grid Search STGCN

In [None]:
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 
             'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']

grid_values = {'k':[3, 5, 8, 10, 12], 'epochs':[5, 10, 20, 30]}

grid = GridSearchCV(estimator=model_CV, param_grid=grid_values, n_jobs=-1, cv=3)

grid_result = grid.fit(speeds)

In [None]:
# print results
print(f'Best Accuracy for {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')