In [1]:
import numpy as np
from typing import Tuple, Dict
np.set_printoptions(edgeitems=30, linewidth=100000, 
    formatter=dict(float=lambda x: "%.3g" % x))
import pandas as pd
import tensorflow as tf
from tensorflow.math import logical_not
from einops import rearrange, repeat
from builderfuncs import restore_model
from tabulate import tabulate
from ktfuncs import *



df = pd.read_csv('data/scaled_U2_data.csv', index_col=0)

df.drop("UNNAMED: 0", axis=1, inplace=True)

devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(devices[0], True)

print("Num GPUs Available: ", len(devices))

Num GPUs Available:  1


In [2]:
print(list(df.columns)[27])

FA_2_LR_DMP


# RNN Setup

In [3]:
def ttv_split(ds: tf.data.Dataset, ds_size: int, 
              train_split: float = 0.8, 
              val_split: float = 0.1, 
              test_split: float = 0.1) -> Tuple[tf.data.Dataset, 
                                                tf.data.Dataset, 
                                                tf.data.Dataset]:

    assert (train_split + test_split + val_split) == 1
   
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds, val_ds, test_ds


def split_window(features: tf.Tensor) -> Tuple[Dict[tf.Tensor, tf.Tensor],
                                               tf.Tensor]:
    inputs = features[:12, 1:]
    state_labels = features[12, 75:]
    targ_labels = tf.expand_dims(features[11, 0], axis=0)
    labels = tf.concat([targ_labels, state_labels], axis=0)
    inputs.set_shape([12, 247])
    labels.set_shape([174])
    

    return inputs, labels, features

                        
def make_dataset(data: pd.DataFrame, length: int, 
                 batch_size: int = 64, multistep: bool = True) -> Tuple[tf.data.Dataset, 
                                                                        tf.data.Dataset, 
                                                                        tf.data.Dataset]:
    data = np.array(data.iloc[:, :], dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(data=data,
                                                        targets=None,
                                                        sequence_length=length,
                                                        sequence_stride=1,
                                                        shuffle=True,
                                                        seed=1,
                                                        batch_size=None)

    ds = ds.filter(lambda x: tf.reduce_all(logical_not(tf.math.is_nan(x))))
    ds = ds.map(split_window).batch(batch_size)
    # ds = ds.apply(tf.data.experimental.assert_cardinality(170680//batch_size + 1))
    train_ds, val_ds, test_ds = ttv_split(ds, 170680//batch_size, train_split=0.8, val_split=0.1, test_split=0.1)
    train_ds = train_ds
    return train_ds, val_ds, test_ds

In [4]:
bs = 64
train_data, val_data, test_data = make_dataset(df, 23, batch_size=bs)
test_iter = iter(test_data)


In [5]:
gru = restore_model("model_folder/gru_folder", modeltype="gru")
bigru = restore_model("model_folder/bigru_folder", modeltype="bigru")
lstm = restore_model("model_folder/lstm_folder", modeltype="lstm")
bilstm = restore_model("model_folder/bilstm_folder", modeltype="bilstm")

In [6]:
from time import time
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score

class predsandtimes():
    def __init__(self, model) -> None:
        self.name = model.name
        self.model = model
        self.preds    = []
        self.labels = []
        self.times    = []
        self.r2s   = []
        self.mses = []
        self.maes = []
        self.evs = []

    def multistep_pred(self, inpts, ftrs):
        preds = []
        s1 = time()
        for i in range(1, 13):
            pred = self.model.predict(inpts, verbose=0)
            preds.append(pred)
            try:
                inpts = inpts[:, 1:, :]
                next_step = ftrs[:, 11+i, 1:]
                next_step[:,  74:] = pred[:, 1:]
                next_step = rearrange(next_step, "b d -> b 1 d")
                inpts = np.concatenate([inpts, next_step], axis=1)
            except IndexError:
                break
        s2 = time()
        self.times.append(s2-s1)
        preds = rearrange(np.array(preds), "t b d -> b t d")
        self.preds.append(preds)
        self.labels.append(ftrs[:, 11:, 0])
        self.r2s.append(r2_score(ftrs[:, 11:, 0], preds[:, :, 0]))
        self.mses.append(mean_squared_error(ftrs[:, 11:, 0], preds[:, :, 0]))
        self.maes.append(mean_absolute_error(ftrs[:, 11:, 0], preds[:, :, 0]))
        self.evs.append(explained_variance_score(ftrs[:, 11:, 0], preds[:, :, 0]))




### Get Predictions from each RNN

In [7]:
iteration = 0
metric_list = [predsandtimes(gru), predsandtimes(bigru), predsandtimes(lstm), predsandtimes(bilstm)]

for iteration, test1 in enumerate(test_iter):
    print(f"{iteration}", end='\r')

    test1 = next(test_iter)
    inputs = test1[0]
    features = test1[2].numpy()

    for metric in metric_list:
        metric.multistep_pred(inputs, features)

140

In [8]:
table = [[metric.name, np.mean(metric.mses), np.mean(metric.maes), np.mean(metric.r2s), np.mean(metric.evs)] for metric in metric_list]
headers = ["name", "MSE", "MAE", "R2", "Exp. Variance"]
print(tabulate(table, headers=headers))


name                 MSE        MAE        R2    Exp. Variance
------------  ----------  ---------  --------  ---------------
gru_model     0.0040916   0.0417484  0.920992         0.9244
bigru_model   0.00331878  0.0364155  0.937081         0.938113
lstm_model    0.00285724  0.0350658  0.944131         0.946657
bilstm_model  0.00276496  0.0340436  0.945564         0.946977


In [11]:
for metric in metric_list:
    print(metric.name, np.mean(metric.times)/64)

gru_model 0.011847334761991568
bigru_model 0.014533195433253092
lstm_model 0.012506076128136182
bilstm_model 0.016080541179535238


In [12]:
gru.predict(test_data)
bigru.predict(test_data)
lstm.predict(test_data)
bilstm.predict(test_data)



array([[0.173, 0.367, 0.85, 0.703, 0.627, 0.609, 0.794, 0.778, 0.464, 0.619, 0.615, 0.783, 0.852, 0.261, 0.792, 0.874, 0.111, 0.00403, 0.123, 0.00431, 0.00586, 0.00354, 0.686, 0.735, 0.729, 0.032, 0.78, 0.485, 0.45, 0.266, ..., 0.572, 0.603, 0.718, 0.719, 0.0302, 0.607, 0.614, 0.63, 0.764, 0.747, 0.347, 0.783, 0.538, 0.757, 0.823, 0.704, 0.535, 0.676, 0.833, 0.607, 0.092, 0.0664, 0.116, 0.594, 0.378, 0.417, 0.4, 0.874, 0.646, 0.922],
       [0.241, 0.809, 0.259, 0.118, 0.712, 0.626, 0.884, 0.778, 0.507, 0.697, 0.694, 0.41, 0.847, 0.325, 0.792, 0.891, 0.0967, 0.000972, 0.111, 0.00227, 0.00395, 0.00489, 0.519, 0.696, 0.689, 0.701, 0.733, 0.411, 0.438, 0.282, ..., 0.574, 0.565, 0.783, 0.779, 0.0299, 0.594, 0.698, 0.693, 0.794, 0.724, 0.46, 0.589, 0.61, 0.853, 0.867, 0.776, 0.625, 0.671, 0.813, 0.686, 0.193, 0.105, 0.216, 0.603, 0.161, 0.42, 0.349, 0.887, 0.719, 0.935],
       [0.502, 0.806, 0.12, 0.562, 0.448, 0.627, 0.743, 0.716, 0.343, 0.456, 0.459, 0.72, 0.883, 0.171, 0.78, 0.773, 0.09