In [1]:
import numpy as np
from typing import Tuple, Dict
np.set_printoptions(edgeitems=30, linewidth=100000, 
    formatter=dict(float=lambda x: "%.3g" % x))
import pandas as pd
import tensorflow as tf
from tensorflow.math import logical_not
from einops import rearrange, repeat
from builderfuncs import restore_model
from tabulate import tabulate
from ktfuncs import *



df = pd.read_csv('data/scaled_U2_data.csv', index_col=0)

df.drop("UNNAMED: 0", axis=1, inplace=True)

devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(devices[0], True)

print("Num GPUs Available: ", len(devices))

Num GPUs Available:  1


# RNN Setup

In [2]:
def ttv_split(ds: tf.data.Dataset, ds_size: int, 
              train_split: float = 0.8, 
              val_split: float = 0.1, 
              test_split: float = 0.1) -> Tuple[tf.data.Dataset, 
                                                tf.data.Dataset, 
                                                tf.data.Dataset]:

    assert (train_split + test_split + val_split) == 1
   
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds, val_ds, test_ds


def split_window(features: tf.Tensor) -> Tuple[Dict[tf.Tensor, tf.Tensor],
                                               tf.Tensor]:
    inputs = features[:12, 1:]
    state_labels = features[12, 75:]
    targ_labels = tf.expand_dims(features[11, 0], axis=0)
    labels = tf.concat([targ_labels, state_labels], axis=0)
    inputs.set_shape([12, 247])
    labels.set_shape([174])
    

    return inputs, labels, features

                        
def make_dataset(data: pd.DataFrame, length: int, 
                 batch_size: int = 64, multistep: bool = True) -> Tuple[tf.data.Dataset, 
                                                                        tf.data.Dataset, 
                                                                        tf.data.Dataset]:
    data = np.array(data.iloc[:, :], dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(data=data,
                                                        targets=None,
                                                        sequence_length=length,
                                                        sequence_stride=1,
                                                        shuffle=True,
                                                        seed=1,
                                                        batch_size=None)

    ds = ds.filter(lambda x: tf.reduce_all(logical_not(tf.math.is_nan(x))))
    ds = ds.map(split_window).batch(batch_size)
    # ds = ds.apply(tf.data.experimental.assert_cardinality(170680//batch_size + 1))
    train_ds, val_ds, test_ds = ttv_split(ds, 170680//batch_size, train_split=0.8, val_split=0.1, test_split=0.1)
    train_ds = train_ds
    return train_ds, val_ds, test_ds

In [3]:
bs = 64
train_data, val_data, test_data = make_dataset(df, 23, batch_size=bs)
test_iter = iter(test_data)


In [4]:
gru = restore_model("model_folder/gru_folder", modeltype="gru")
bigru = restore_model("model_folder/bigru_folder", modeltype="bigru")
lstm = restore_model("model_folder/lstm_folder", modeltype="lstm")
bilstm = restore_model("model_folder/bilstm_folder", modeltype="bilstm")

In [5]:
from time import time
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score

class predsandtimes():
    def __init__(self, model) -> None:
        self.name = model.name
        self.model = model
        self.preds    = []
        self.labels = []
        self.times    = []
        self.r2s   = []
        self.mses = []
        self.maes = []
        self.evs = []

    def multistep_pred(self, inpts, ftrs):
        preds = []
        s1 = time()
        for i in range(1, 13):
            pred = self.model.predict(inpts, verbose=0)
            preds.append(pred)
            try:
                inpts = inpts[:, 1:, :]
                next_step = ftrs[:, 11+i, 1:]
                next_step[:,  74:] = pred[:, 1:]
                next_step = rearrange(next_step, "b d -> b 1 d")
                inpts = np.concatenate([inpts, next_step], axis=1)
            except IndexError:
                break
        s2 = time()
        self.times.append(s2-s1)
        preds = rearrange(np.array(preds), "t b d -> b t d")
        self.preds.append(preds)
        self.labels.append(ftrs[:, 11:, 0])
        self.r2s.append(r2_score(ftrs[:, 11:, 0], preds[:, :, 0]))
        self.mses.append(mean_squared_error(ftrs[:, 11:, 0], preds[:, :, 0]))
        self.maes.append(mean_absolute_error(ftrs[:, 11:, 0], preds[:, :, 0]))
        self.evs.append(explained_variance_score(ftrs[:, 11:, 0], preds[:, :, 0]))




### Get Predictions from each RNN

In [6]:
iteration = 0
metric_list = [predsandtimes(gru), predsandtimes(bigru), predsandtimes(lstm), predsandtimes(bilstm)]

for iteration, test1 in enumerate(test_iter):
    print(f"{iteration}", end='\r')

    test1 = next(test_iter)
    inputs = test1[0]
    features = test1[2].numpy()

    for metric in metric_list:
        metric.multistep_pred(inputs, features)

8

In [7]:
table = [[metric.name, np.mean(metric.mses), np.mean(metric.maes), np.mean(metric.r2s), np.mean(metric.evs)] for metric in metric_list]
headers = ["name", "MSE", "MAE", "R2", "Exp. Variance"]
print(tabulate(table, headers=headers))


name                 MSE        MAE        R2    Exp. Variance
------------  ----------  ---------  --------  ---------------
gru_model     0.00349377  0.0415791  0.929307         0.931322
bigru_model   0.00276085  0.0362202  0.94416          0.944263
lstm_model    0.0023954   0.034826   0.951492         0.953017
bilstm_model  0.00235463  0.0339577  0.952249         0.952866
