In [1]:
import keras
import math
import pandas as pd
import numpy as np
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time
from torch.utils.data import Dataset, DataLoader
import pickle
pd.set_option('display.max_rows', 500)
import os
import tensorflow as tf
import torch
import torch.nn as nn
from math import sqrt
# import rmse from sklearn
from sklearn.metrics import mean_squared_error


# define random seeds for Neural Networks
torch.manual_seed(0)
np.random.seed(0)
tf.random.set_seed(0)
# ignore warnings jupyter notebook
import warnings
warnings.filterwarnings('ignore')

# OWRI FRAMEWORK

In [2]:
# results save path
base_result_path = '../results/METR-LA/LSTM'
exp_name = 'multivariate_AE_weighted_vector_real_time_results.pkl'
results_save_path = os.path.join(base_result_path, exp_name)

In [3]:
with open('../data/METR-LA/METR_OWRI/featured_fpds_raw.pickle', 'rb') as f:
    results = pickle.load(f)

In [4]:
# load data of correlated results from pickle file
with open('../results/METR-LA/outlier_scores/AE/correlated_results.pickle', 'rb') as f:
    correlated_results = pickle.load(f)

In [5]:
def merge_trejectory_data(results, trajectory, direction):
    data = pd.DataFrame()
    for intersection_name in results[trajectory][direction]['raw']:
        intersection = results[trajectory][direction]['raw'][intersection_name]
        intersection = intersection.rename(columns={"cars": intersection_name})
        intersection = intersection.set_index(pd.DatetimeIndex(intersection['timestamp']))
        intersection = intersection.drop(columns=['timestamp'])
        data = pd.merge(data, intersection, left_index=True, right_index=True, how='outer')
    data.dropna(inplace=True)
    return data

In [6]:
def merge_trejectory_data_metr(results):
    data = pd.DataFrame()
    for intersection_name in results['raw']:
        intersection = results['raw'][intersection_name]
        intersection = intersection.rename(columns={"cars": intersection_name})
        intersection = intersection.set_index(pd.DatetimeIndex(intersection['timestamp']))
        intersection = intersection.drop(columns=['timestamp'])
        data = pd.merge(data, intersection, left_index=True, right_index=True, how='outer')
    data.dropna(inplace=True)
    return data

In [7]:
def add_top_corr_features(data, top_corr_df):
    arr1 = []
    for i in range(data.shape[0]):
        arr2 = []
        for j in range(data.shape[1]):
            temp = data[i][j].reshape(-1,1) * top_corr_df.values
            temp = temp.reshape(-1)
            arr2.append(temp)
        arr1.append(np.array(arr2))
    return np.array(arr1)

In [8]:
def preprocess_df(df,top_corr_df, n_obs, n_features, sequence_length):
    #do scaling:
    scaler = StandardScaler()
    train_portion = 0.8
    test_portion = 0.2
    df_train = df[:math.ceil(len(df)*train_portion)].values
    df_test = df[math.ceil(len(df)*(train_portion)):].values
    train_X, train_y = df_train[:, :n_obs], df_train[:, -n_features:]
    test_X, test_y = df_test[:, :n_obs], df_test[:, -n_features:]
    scl = scaler.fit(train_X) # fit only on training data
    train_X = scl.transform(train_X)
    test_X = scl.transform(test_X)
    train_X = train_X.reshape((train_X.shape[0], sequence_length, n_features))
    test_X = test_X.reshape((test_X.shape[0], sequence_length, n_features))

    # add top correlated features weighted sum
    train_X = add_top_corr_features(train_X, top_corr_df)
    test_X = add_top_corr_features(test_X, top_corr_df)


    # # add top correlated features as it is 
    # corr_array = top_corr_df.values
    # corr_tiled_train = np.tile(corr_array, (train_X.shape[0], sequence_length, 1, 1))
    # corr_tiled_test = np.tile(corr_array, (test_X.shape[0], sequence_length, 1, 1))
    # train_X = np.concatenate([train_X[:, :, :, np.newaxis], corr_tiled_train], axis=3) # add outlier dimension
    # train_X = train_X.reshape(train_X.shape[0],train_X.shape[1],-1) # reshape to 3D
    # test_X = np.concatenate([test_X[:, :, :, np.newaxis], corr_tiled_test], axis=3) # add outlier dimension
    # test_X = test_X.reshape(test_X.shape[0],test_X.shape[1],-1) # reshape to 3D

    return train_X, train_y, test_X, test_y, scaler

In [9]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = pd.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [10]:
def get_top_corr_features(top_corr_df, top_k_col):
    # create new coorelation df with top k correlated intersections for each intersection
    new_corr_df = []
    for col in top_corr_df.columns:
        new_corr_df.append(top_corr_df[col].sort_values(ascending=False)[:top_k_col].values)
    new_corr_df = pd.DataFrame(new_corr_df, index=top_corr_df.columns)
    return new_corr_df

In [11]:
class LSTM_uni(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, device = 'mps',layer_dim=1, dropout_prob = 0.2):
        super(LSTM_uni, self).__init__()
        self.hidden_dim = hidden_dim # number of hidden units in hidden state
        self.layer_dim = layer_dim # number of stacked lstm layers
        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, feature_dim)
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob)
        self.fc = nn.Linear(hidden_dim, output_dim) # fully connected layer

    def forward(self, x, future=False):
        # input x is expected to be of shape (batch_dim, seq_dim, feature_dim)
        # hidden and cell states are expected along with input x in LSTMs = (h_0, c_0)
        # Initialize hidden state with zeros (layer_dim, batch_size, hidden_dim)
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim, device=device).requires_grad_()
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim, device=device).requires_grad_()
        # LSTM output is Outputs: output, (h_n, c_n)
        # output is of shape (batch_dim, seq_dim, hidden_dim), h_n and c_n are of shape (layer_dim, batch_dim, hidden_dim)
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = out[:, -1, :] # only take the last output of the sequence
        out = self.fc(out) # fully connected layer
        return out

In [12]:
def train_model(model, train_X,train_y, loss_fn, optimiser, device, batch_size, epochs=250):
    history = {}
    history['train_loss'] = []

    train_X_loader = DataLoader(train_X, batch_size=batch_size, shuffle=False)
    train_y_loader = DataLoader(train_y, batch_size=batch_size, shuffle=False)

    for epoch in range(epochs):
        history[epoch] = []
        ep_start = time.time()
        running_loss = 0.0
        for bx, data in enumerate(zip(train_X_loader,train_y_loader)):
            X = data[0].to(device)
            y = data[1].to(device)
            bt = model(X)
            loss = loss_fn(bt.reshape(-1), y.reshape(-1)) # calculate loss for input and recreated output
            history[epoch].append(loss.item())
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            running_loss += loss.item()
        epoch_loss = running_loss/train_X.shape[0]
        history['train_loss'].append(epoch_loss)

    return history
    

In [13]:
def model_evaluation( model, test_X, device):
    test_X_loader = DataLoader(test_X, batch_size=64, shuffle=False)
    model = model.eval()
    preds = []
    with torch.no_grad():
        for bx, data in enumerate(test_X_loader):
            X = data.to(device)
            bt = model(X)
            preds.append(bt.cpu().numpy())
    preds = np.vstack(preds)
    return preds

In [14]:
def preprocess_df_2(df,ae_score):
    weighted_ls = []
    correlated_AE = ae_score.corr().values
    for idx, row in df.iterrows():
        weighted_ls.append(np.sum(row.values*correlated_AE, axis=1))
    weighted_df = pd.DataFrame(weighted_ls, columns=df.columns)
    return weighted_df

In [15]:
# declare variables
# thresholds = [0,0.25,0.5,0.75,1]
thresholds = [0.25]
epoch = 100
batch_size = 64
learning_rate = 0.1
hidden_size = 32
num_layers = 1
dropout = 0.2
sequence_length = 12
output_pred = 1 # number of time steps to predict
device = 'mps' if torch.backends.mps.is_available() else 'cpu'

In [16]:
# errors={}  # define a dictionary to store the errors
# dfs={} # Define a dictionary to store the dataframes
# errors = {} # Define a dictionary to store the errors
# intersection_arrays = [] # Define a list to store the intersection arrays
# for trajectory in results.keys(): # Loop over all trajectories
#     print("\nStarting trajectory: {}".format(trajectory))  
#     # Define a dictionary to store the errors for this trajectory
#     errors[trajectory]={}
#     # Loop over all directions
#     for direction in results[trajectory]:
#         print("\nStarting direction: {}".format(direction))
#         # Define a dictionary to store the errors for this trajectory and direction
#         errors[trajectory][direction]={}
#         # Loop over all thresholds
#         for threshold in thresholds:
#             print("Starting threshold: {}".format(threshold))
#             # Define a dictionary to store the errors for this trajectory, direction, and threshold
#             errors[trajectory][direction][threshold]={}
#             # ------------------------------------ data processing ---------------------------------------- #
#             df = merge_trejectory_data(results, trajectory, direction)# get raw data of the current trajectory and direction
#             ae_score = correlated_results[trajectory][direction] # AE scores of the current trajectory and direction
#             top_k_col = math.ceil(len(ae_score.columns)*threshold) # number of outlier weighted intersections to use
#             if top_k_col==0: # if threshold is 0, then use the target intersection only
#                 top_k_col=1
#             top_corr_df = ae_score.corr()[df.columns.to_list()] # rearrange the correlation matrix
#             top_corr_df = get_top_corr_features(top_corr_df, top_k_col) # get the top k correlated intersections
#             n_features = len(df.columns) # number of features (correlated intersections)
#             n_obs = sequence_length * n_features # number of columns in the input
#             # weighted_df = preprocess_df_2(df,ae_score)
#             reframed = series_to_supervised(df, sequence_length, output_pred)
#             train_X, train_y, test_X, test_y, scl = preprocess_df(reframed,top_corr_df, n_obs, n_features, sequence_length)
#             train_X, train_y, test_X, test_y = train_X.astype('float32'), train_y.astype('float32'), test_X.astype('float32'), test_y.astype('float32')


#             # # # ------------------------------------ modelling ---------------------------------------------- #
#             # define model, loss function and optimizer
#             model = LSTM_uni(input_dim = train_X.shape[-1], hidden_dim = hidden_size, layer_dim = num_layers, output_dim = train_y.shape[1], dropout_prob= dropout)
#             model = model.to(device)
#             loss_fn = torch.nn.MSELoss()
#             optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
#             start = time.time()
#             history = train_model(model, train_X,train_y, loss_fn, optimiser, device, batch_size = batch_size, epochs=epoch)
#             end = time.time()
#             print("Training time: {}".format(end-start))

#             # ------------------------------------ evaluation ---------------------------------------------- #
#             yhat = model_evaluation(model, test_X , device)
#             errors[trajectory][direction][threshold]['RMSE'] = sqrt(mean_squared_error(yhat,test_y))
#             errors[trajectory][direction][threshold]['MAE'] = mean_absolute_error(yhat,test_y)
#             errors[trajectory][direction][threshold]['history'] = history
#             errors[trajectory][direction][threshold]['df'] = {"Real":test_y,"Predicted":yhat}
#             errors[trajectory][direction][threshold]['train_time'] = end-start
#             print("RMSE: {}".format(errors[trajectory][direction][threshold]['RMSE']))


# # save errors in save path as pickle file
# with open(results_save_path, 'wb') as handle:
#     pickle.dump(errors, handle)

In [17]:
errors={}  # define a dictionary to store the errors
dfs={} # Define a dictionary to store the dataframes
errors = {} # Define a dictionary to store the errors
intersection_arrays = [] # Define a list to store the intersection arrays
for threshold in thresholds:
    print("Starting threshold: {}".format(threshold))
    # Define a dictionary to store the errors for this trajectory, direction, and threshold
    errors[threshold]={}
    # ------------------------------------ data processing ---------------------------------------- #
    df = merge_trejectory_data_metr(results)# get raw data of the current trajectory and direction
    ae_score = correlated_results['df'] # AE scores of the current trajectory and direction
    top_k_col = math.ceil(len(ae_score.columns)*threshold) # number of outlier weighted intersections to use
    if top_k_col==0: # if threshold is 0, then use the target intersection only
        top_k_col=1
    top_corr_df = ae_score.corr()[df.columns.to_list()] # rearrange the correlation matrix
    top_corr_df = get_top_corr_features(top_corr_df, top_k_col) # get the top k correlated intersections
    n_features = len(df.columns) # number of features (correlated intersections)
    n_obs = sequence_length * n_features # number of columns in the input
    # # weighted_df = preprocess_df_2(df,ae_score)
    reframed = series_to_supervised(df, sequence_length, output_pred)
    train_X, train_y, test_X, test_y, scl = preprocess_df(reframed,top_corr_df, n_obs, n_features, sequence_length)
    train_X, train_y, test_X, test_y = train_X.astype('float32'), train_y.astype('float32'), test_X.astype('float32'), test_y.astype('float32')
    break

#     # # # ------------------------------------ modelling ---------------------------------------------- #
#     # define model, loss function and optimizer
#     model = LSTM_uni(input_dim = train_X.shape[-1], hidden_dim = hidden_size, layer_dim = num_layers, output_dim = train_y.shape[1], dropout_prob= dropout)
#     model = model.to(device)
#     loss_fn = torch.nn.MSELoss()
#     optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
#     start = time.time()
#     history = train_model(model, train_X,train_y, loss_fn, optimiser, device, batch_size = batch_size, epochs=epoch)
#     end = time.time()
#     print("Training time: {}".format(end-start))

#     # ------------------------------------ evaluation ---------------------------------------------- #
#     yhat = model_evaluation(model, test_X , device)
#     errors[threshold]['RMSE'] = sqrt(mean_squared_error(yhat,test_y))
#     errors[threshold]['MAE'] = mean_absolute_error(yhat,test_y)
#     errors[threshold]['history'] = history
#     errors[threshold]['df'] = {"Real":test_y,"Predicted":yhat}
#     errors[threshold]['train_time'] = end-start
#     print("RMSE: {}".format(errors[threshold]['RMSE']))


# # save errors in save path as pickle file
# with open(results_save_path, 'wb') as handle:
#     pickle.dump(errors, handle)

Starting threshold: 0.25


: 

: 

In [165]:
# load data of correlated results from pickle file
with open('../results/hauge/LSTM/', 'rb') as f:
    errors = pickle.load(f)

In [166]:
AE_results={}
for trajectory in errors.keys():
    for direction in errors[trajectory].keys():
        for threshold in errors[trajectory][direction].keys():
            AE_results[trajectory+'_'+direction+'_'+str(threshold)] = errors[trajectory][direction][threshold]['MAE']

In [161]:
AE_results

{'T1_North_1': 8.473589,
 'T1_South_1': 9.20982,
 'T2_South_1': 7.1821027,
 'T2_North_1': 7.388921}

In [164]:
AE_results

{'T1_North_0': 9.160221,
 'T1_North_0.25': 9.257724,
 'T1_North_0.5': 9.112778,
 'T1_North_0.75': 9.51287,
 'T1_North_1': 9.193073,
 'T1_South_0': 10.248888,
 'T1_South_0.25': 9.90335,
 'T1_South_0.5': 10.204169,
 'T1_South_0.75': 10.108185,
 'T1_South_1': 10.322937,
 'T2_South_0': 8.0753,
 'T2_South_0.25': 7.7263927,
 'T2_South_0.5': 7.820727,
 'T2_South_0.75': 7.7311473,
 'T2_South_1': 7.762887,
 'T2_North_0': 8.186727,
 'T2_North_0.25': 7.9963584,
 'T2_North_0.5': 8.1758,
 'T2_North_0.75': 8.200523,
 'T2_North_1': 8.174169}

In [167]:
AE_results

{'T1_North_1': 16.459387,
 'T1_South_1': 20.433876,
 'T2_South_1': 11.701148,
 'T2_North_1': 12.529651}

In [55]:
top_corr_df

Unnamed: 0,0,1,2,3,4,5,6,7
K704,1.0,0.364034,0.342004,0.32252,0.317705,0.289905,0.27422,0.269615
K702,1.0,0.381637,0.27422,0.223593,0.195563,0.193108,0.165949,0.165231
K703,1.0,0.381637,0.364034,0.301384,0.282673,0.281325,0.280306,0.265226
K159,1.0,0.342004,0.282673,0.261087,0.259515,0.251675,0.237229,0.235254
K182,1.0,0.289905,0.281325,0.264719,0.259515,0.259074,0.247584,0.242283
K183,1.0,0.286915,0.264719,0.255097,0.251795,0.251675,0.24502,0.234203
K128,1.0,0.183039,0.164001,0.147611,0.138978,0.135067,0.134432,0.132459
K139,1.0,0.269615,0.249463,0.247584,0.24502,0.235254,0.234357,0.231896
K104,1.0,0.234357,0.202606,0.200443,0.195063,0.193549,0.176737,0.176212
K101,1.0,0.292145,0.265226,0.245818,0.231896,0.227966,0.225558,0.21268


In [68]:
top_corr_df.shape

(15, 8)

In [69]:
temp = np.reshape(top_corr_df.values, (1, 1, 15, 8))

In [71]:
temp.shape

(1, 1, 15, 8)

In [85]:
train_X.shape

(78327, 12, 15)

In [108]:
np.array(ls_big).shape

(2, 12, 120)

In [82]:
np.dot(train_X[0][0].reshape(-1,1), top_corr_df.values)

ValueError: shapes (15,1) and (15,8) not aligned: 1 (dim 1) != 15 (dim 0)

In [75]:
top_corr_df.values

array([[1.        , 0.36403377, 0.342004  , 0.32252012, 0.31770541,
        0.2899053 , 0.27421988, 0.2696154 ],
       [1.        , 0.38163735, 0.27421988, 0.2235932 , 0.19556313,
        0.19310824, 0.1659489 , 0.16523105],
       [1.        , 0.38163735, 0.36403377, 0.30138352, 0.28267323,
        0.28132489, 0.28030614, 0.26522622],
       [1.        , 0.342004  , 0.28267323, 0.26108656, 0.25951506,
        0.25167514, 0.23722889, 0.23525413],
       [1.        , 0.2899053 , 0.28132489, 0.26471929, 0.25951506,
        0.25907424, 0.24758407, 0.24228294],
       [1.        , 0.28691516, 0.26471929, 0.25509719, 0.25179464,
        0.25167514, 0.24502044, 0.23420319],
       [1.        , 0.18303868, 0.16400144, 0.14761058, 0.13897756,
        0.13506668, 0.13443234, 0.1324586 ],
       [1.        , 0.2696154 , 0.24946282, 0.24758407, 0.24502044,
        0.23525413, 0.23435709, 0.23189594],
       [1.        , 0.23435709, 0.20260551, 0.20044315, 0.19506288,
        0.19354933, 0.176737

In [64]:
train_X

array([[-0.31507635, -0.11469843, -0.10775737, -0.10161846, -0.10010146,
        -0.09134231, -0.0864002 , -0.08494944],
       [-0.44912729, -0.17140375, -0.12315963, -0.10042181, -0.08783274,
        -0.08673018, -0.07453218, -0.07420977],
       [-0.28740495, -0.10968446, -0.10462511, -0.08661912, -0.08124169,
        -0.08085417, -0.08056137, -0.07622733],
       [-0.09352473, -0.03198583, -0.02643694, -0.02441805, -0.02427108,
        -0.02353785, -0.02218677, -0.02200208],
       [-0.14520952, -0.04209701, -0.04085105, -0.03843976, -0.03768406,
        -0.03762005, -0.03595156, -0.03518179],
       [-0.17551826, -0.05035885, -0.04646307, -0.04477422, -0.04419456,
        -0.04417358, -0.04300556, -0.04110694],
       [-0.12826431, -0.02347733, -0.02103553, -0.01893317, -0.01782586,
        -0.01732423, -0.01724287, -0.01698971],
       [-0.19457111, -0.05245937, -0.04853826, -0.04817271, -0.0476739 ,
        -0.04577366, -0.04559912, -0.04512025],
       [-0.02802156, -0.00656705

In [None]:
# multiplying with the 3rd dimention of the whole train_X with shape (n_samples, n_timesteps, n_features)

In [66]:
train_X.shape

(78327, 12, 15)

In [65]:
train_X[0][0]

array([-0.31507635, -0.4491273 , -0.28740495, -0.09352473, -0.14520952,
       -0.17551826, -0.12826431, -0.19457111, -0.02802156, -0.10759474,
       -0.21954085, -0.22892836, -0.204619  , -0.3299557 , -0.351749  ],
      dtype=float32)

In [54]:
ae_score

Unnamed: 0_level_0,K704,K702,K703,K159,K182,K183,K128,K139,K104,K101,K206,K074,K414,K415,K250
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-01-01 01:00:00,0.437718,0.515155,0.229290,0.221861,0.592250,0.340062,0.687709,0.414231,0.579028,0.247488,0.564301,0.811996,0.584248,0.727704,0.482999
2018-01-01 02:00:00,1.096224,0.688840,0.509010,0.402139,0.607743,0.596373,1.146240,0.351957,0.544668,0.555715,0.554146,1.048120,0.651002,0.712690,0.670355
2018-01-01 03:00:00,1.017256,0.746809,0.622872,0.680986,0.691022,0.631199,0.928740,0.570491,0.534584,0.471159,0.392499,1.115597,0.840674,0.857182,0.806166
2018-01-01 04:00:00,0.861961,0.839790,0.705627,0.858127,0.825192,0.694095,0.848465,0.927224,0.881316,0.791312,0.933026,0.844556,1.068071,0.782274,0.811864
2018-01-01 05:00:00,0.824932,0.959399,0.863709,0.797068,0.780820,0.831422,0.869303,0.883617,0.893689,0.735172,0.968681,0.880942,0.909545,0.736027,0.866506
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-03-30 20:00:00,0.973106,0.976283,0.956759,0.898046,0.936732,0.921183,1.001214,0.918698,1.032083,1.096951,0.942169,1.038304,1.063547,0.987005,1.063492
2020-03-30 21:00:00,1.046644,0.956518,1.155419,1.114104,1.024698,1.006259,0.981027,0.975505,0.957298,1.084331,0.995062,1.163347,0.995454,1.025594,1.085228
2020-03-30 22:00:00,1.159791,0.940150,0.912818,1.012481,1.034591,1.092172,0.967101,0.984440,0.929981,1.025913,0.903882,1.098595,0.905515,0.980438,0.926042
2020-03-30 23:00:00,1.101063,1.606521,0.988224,0.904493,0.924702,0.834446,0.942835,1.089642,1.132883,0.865337,0.782019,1.021487,0.934080,0.783546,0.978447


In [36]:
with open('../results/hauge/LSTM/multivariate_AE_weighted_real_time_results.pkl', 'rb') as f:
    errors = pickle.load(f)

In [37]:
AE_results={}
for trajectory in errors.keys():
    for direction in errors[trajectory].keys():
        for threshold in errors[trajectory][direction].keys():
            AE_results[trajectory+'_'+direction+'_'+str(threshold)] = errors[trajectory][direction][threshold]['RMSE']

In [38]:
AE_results

{'T1_North_1': 36.55789567696868,
 'T1_South_1': 45.831808686762514,
 'T2_South_1': 24.06641100521378,
 'T2_North_1': 27.554807112725925}

In [60]:
temp1 = df.iloc[0:1].values
temp1

array([[17., 13., 16., 14., 27., 27., 28., 24., 28.]])

In [7]:
top_corr_df

NameError: name 'top_corr_df' is not defined

In [74]:
weighted_ls = []
for idx, row in df.iterrows():
    weighted_ls.append(np.sum(row.values*ae_score.corr().values, axis=1))
    break
weighted_df = pd.DataFrame(weighted_ls, columns=df.columns)

In [75]:
weighted_df

Unnamed: 0,K502,K504,K503,K263,K556,K557,K559,K561,K198
0,80.138779,84.507618,51.752208,66.469568,88.39996,87.834296,85.691173,88.867228,82.782186


In [58]:
# multiply top_corr_df and temp1 on axis 1
temp2 = np.multiply(top_corr_df, temp1.T)

In [59]:
temp2

Unnamed: 0,0,1,2
K502,17.0,7.953941,6.581471
K504,13.0,6.082425,5.794025
K503,16.0,3.674517,3.587646
K263,14.0,5.088919,4.870812
K556,27.0,12.033744,11.60796
K557,27.0,12.413916,11.60796
K559,28.0,13.200926,11.503046
K561,24.0,11.315079,11.034592
K198,28.0,11.614921,10.840069


In [66]:
temp = df.iloc[0:1].values* ae_score.values
temp

array([[ 5.88451228,  7.98631253,  8.76416242, ..., 19.52396743,
         8.93451764,  8.11603002],
       [13.03872917,  8.53337788, 16.27987158, ..., 16.60562073,
        14.52707614, 15.79520361],
       [12.25591678,  9.87049421, 14.31073331, ..., 25.74764848,
        17.02123224, 28.91502877],
       ...,
       [16.56042983, 13.29413395, 15.29007718, ..., 31.08993165,
        28.30310678, 29.9121107 ],
       [14.5622421 , 13.00756001, 15.98760939, ..., 27.69862685,
        33.12919221, 42.64442389],
       [22.0166774 , 14.47573141, 14.1927761 , ..., 36.42481365,
        32.75264748, 30.01020415]])

In [39]:
# get sum of each row in temp
temp.sum(axis=1)

array([80.13877944, 84.50761822, 51.75220818, 66.4695678 , 88.39995972,
       87.83429633, 85.69117257, 88.86722821, 82.78218594])

In [None]:
AE_results

{'T1_North_0.5': 13.207560391278088,
 'T1_South_0.5': 15.087792905225621,
 'T2_South_0.5': 13.155181768909424,
 'T2_North_0.5': 15.316885931184128}

# EXTRA

In [292]:
c = [a.cpu().detach().numpy(),b.cpu().detach().numpy()]

In [293]:
d = np.vstack(c)

In [300]:
a.shape

torch.Size([64, 9])

In [304]:
temp = test_y[:64,:].shape

In [251]:
test_y

torch.Size([64, 9])

In [264]:
mean_squared_error(bt.cpu().detach().numpy(),y.cpu().detach().numpy())

35.592785

In [266]:
loss_fn(bt.reshape(-1),y.reshape(-1))

tensor(35.5928, device='mps:0', grad_fn=<MseLossBackward0>)

torch.Size([576])

In [239]:
train_X.shape

(107021, 12, 90)

In [241]:
model

LSTM_uni(
  (lstm): LSTM(9, 64, batch_first=True, dropout=0.2)
  (fc): Linear(in_features=64, out_features=9, bias=True)
)

In [91]:
b = ae_score.corr().values
b.shape

(15, 15)

In [144]:
b_tiled = np.tile(b, (12, 1, 1))

In [151]:
b_tiled.shape

(12, 15, 15)

In [145]:
b_tiled_temp = np.tile(b, (78327, 12, 1, 1))

In [146]:
b_tiled_temp.shape

(78327, 12, 15, 15)

In [147]:
b_tiled_temp

array([[[[1.        , 0.52009281, 0.59531289, ..., 0.47456496,
          0.50346994, 0.55122234],
         [0.52009281, 1.        , 0.51728952, ..., 0.44649454,
          0.54981047, 0.59161937],
         [0.59531289, 0.51728952, 1.        , ..., 0.46227573,
          0.48162972, 0.52886381],
         ...,
         [0.47456496, 0.44649454, 0.46227573, ..., 1.        ,
          0.44934213, 0.51765878],
         [0.50346994, 0.54981047, 0.48162972, ..., 0.44934213,
          1.        , 0.58109163],
         [0.55122234, 0.59161937, 0.52886381, ..., 0.51765878,
          0.58109163, 1.        ]],

        [[1.        , 0.52009281, 0.59531289, ..., 0.47456496,
          0.50346994, 0.55122234],
         [0.52009281, 1.        , 0.51728952, ..., 0.44649454,
          0.54981047, 0.59161937],
         [0.59531289, 0.51728952, 1.        , ..., 0.46227573,
          0.48162972, 0.52886381],
         ...,
         [0.47456496, 0.44649454, 0.46227573, ..., 1.        ,
          0.44934213, 0.5

In [133]:
b_tiled.shape

(12, 15, 15)

In [153]:
c = np.concatenate([train_X[:, :, :, np.newaxis], b_tiled_temp], axis=3)

In [152]:
train_X.shape

(78327, 12, 15)

In [149]:
a.shape

(12, 15)

In [139]:
c[1]

array([[24.        ,  1.        ,  0.52009281,  0.59531289,  0.4905096 ,
         0.59030765,  0.53083411,  0.42160134,  0.44977888,  0.41419196,
         0.43560344,  0.54621173,  0.5127304 ,  0.47456496,  0.50346994,
         0.55122234],
       [42.        ,  0.52009281,  1.        ,  0.51728952,  0.45122571,
         0.49783694,  0.3569048 ,  0.40421719,  0.3800037 ,  0.30118805,
         0.41944514,  0.36872095,  0.51981267,  0.44649454,  0.54981047,
         0.59161937],
       [55.        ,  0.59531289,  0.51728952,  1.        ,  0.48578304,
         0.5503211 ,  0.56126153,  0.41188402,  0.47546262,  0.42028723,
         0.40471204,  0.55828677,  0.49244704,  0.46227573,  0.48162972,
         0.52886381],
       [26.        ,  0.4905096 ,  0.45122571,  0.48578304,  1.        ,
         0.49171092,  0.41857639,  0.38567624,  0.39845473,  0.33828968,
         0.37756179,  0.43709072,  0.42578968,  0.40196755,  0.42904218,
         0.48198089],
       [14.        ,  0.59030765,  0

In [140]:
train_X[0]

array([[28., 24., 54., 44., 26., 34., 31., 30., 56., 59., 36.,  6., 30.,
         9., 15.],
       [24., 42., 55., 26., 14., 38., 29., 31., 57., 29., 21.,  4., 21.,
         6.,  6.],
       [40., 41., 52., 30., 17., 27., 25., 22., 48., 33., 30.,  6., 20.,
         3.,  7.],
       [46., 43., 53., 41., 18., 42., 20., 13., 51., 55., 22.,  0., 19.,
         5.,  8.],
       [ 6., 46., 60., 32.,  5., 27., 34., 34.,  9., 57., 21.,  0., 27.,
        12., 19.],
       [41., 63., 84., 38., 22., 28., 18., 22., 48., 55., 26.,  3., 22.,
         0., 14.],
       [34., 51., 69., 30., 10., 27., 25., 11., 42., 54., 30.,  7., 22.,
        10., 13.],
       [27., 39., 45., 33.,  3., 38., 26., 36., 34., 36., 34.,  0., 20.,
         5.,  5.],
       [28., 63., 74., 27., 11., 37., 37., 26., 44., 54., 35.,  7., 10.,
         6., 19.],
       [35., 51., 52., 34., 19., 18., 23., 38., 47., 40., 24.,  5., 23.,
        13., 15.],
       [31., 40., 36., 30., 21., 31., 36., 29., 50., 52., 31.,  3., 22.,
       

In [128]:
c = np.concatenate([a[:, :, np.newaxis], b_reshaped], axis=2)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 12 and the array at index 1 has size 1

In [104]:
# stack each row of b with each element of a
a[0].shape

(15,)

In [119]:
np.hstack((a[0].reshape(-1,1),b))

array([[28.        ,  1.        ,  0.52009281,  0.59531289,  0.4905096 ,
         0.59030765,  0.53083411,  0.42160134,  0.44977888,  0.41419196,
         0.43560344,  0.54621173,  0.5127304 ,  0.47456496,  0.50346994,
         0.55122234],
       [24.        ,  0.52009281,  1.        ,  0.51728952,  0.45122571,
         0.49783694,  0.3569048 ,  0.40421719,  0.3800037 ,  0.30118805,
         0.41944514,  0.36872095,  0.51981267,  0.44649454,  0.54981047,
         0.59161937],
       [54.        ,  0.59531289,  0.51728952,  1.        ,  0.48578304,
         0.5503211 ,  0.56126153,  0.41188402,  0.47546262,  0.42028723,
         0.40471204,  0.55828677,  0.49244704,  0.46227573,  0.48162972,
         0.52886381],
       [44.        ,  0.4905096 ,  0.45122571,  0.48578304,  1.        ,
         0.49171092,  0.41857639,  0.38567624,  0.39845473,  0.33828968,
         0.37756179,  0.43709072,  0.42578968,  0.40196755,  0.42904218,
         0.48198089],
       [26.        ,  0.59030765,  0

In [126]:
b

array([[1.        , 0.52009281, 0.59531289, 0.4905096 , 0.59030765,
        0.53083411, 0.42160134, 0.44977888, 0.41419196, 0.43560344,
        0.54621173, 0.5127304 , 0.47456496, 0.50346994, 0.55122234],
       [0.52009281, 1.        , 0.51728952, 0.45122571, 0.49783694,
        0.3569048 , 0.40421719, 0.3800037 , 0.30118805, 0.41944514,
        0.36872095, 0.51981267, 0.44649454, 0.54981047, 0.59161937],
       [0.59531289, 0.51728952, 1.        , 0.48578304, 0.5503211 ,
        0.56126153, 0.41188402, 0.47546262, 0.42028723, 0.40471204,
        0.55828677, 0.49244704, 0.46227573, 0.48162972, 0.52886381],
       [0.4905096 , 0.45122571, 0.48578304, 1.        , 0.49171092,
        0.41857639, 0.38567624, 0.39845473, 0.33828968, 0.37756179,
        0.43709072, 0.42578968, 0.40196755, 0.42904218, 0.48198089],
       [0.59030765, 0.49783694, 0.5503211 , 0.49171092, 1.        ,
        0.5057194 , 0.41738819, 0.45516972, 0.40246901, 0.42746746,
        0.49505185, 0.50251473, 0.47448538, 

In [88]:
# merge a and b to make it 12 x 15 x 15 array
c = np.concatenate((a,b), axis=0)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 1 and the array at index 1 has size 15

In [89]:
c

array([[28.        , 24.        , 54.        , 44.        , 26.        ,
        34.        , 31.        , 30.        , 56.        , 59.        ,
        36.        ,  6.        , 30.        ,  9.        , 15.        ],
       [24.        , 42.        , 55.        , 26.        , 14.        ,
        38.        , 29.        , 31.        , 57.        , 29.        ,
        21.        ,  4.        , 21.        ,  6.        ,  6.        ],
       [40.        , 41.        , 52.        , 30.        , 17.        ,
        27.        , 25.        , 22.        , 48.        , 33.        ,
        30.        ,  6.        , 20.        ,  3.        ,  7.        ],
       [46.        , 43.        , 53.        , 41.        , 18.        ,
        42.        , 20.        , 13.        , 51.        , 55.        ,
        22.        ,  0.        , 19.        ,  5.        ,  8.        ],
       [ 6.        , 46.        , 60.        , 32.        ,  5.        ,
        27.        , 34.        , 34.        , 

In [67]:
ae_score.corr().columns.to_list()

['K704',
 'K702',
 'K703',
 'K159',
 'K182',
 'K183',
 'K128',
 'K139',
 'K104',
 'K101',
 'K206',
 'K074',
 'K414',
 'K415',
 'K250']

In [29]:
ae_score.corr()[isct_inc]

Unnamed: 0,K703,K704,K183,K206,K182,K250,K702,K074
K704,0.595313,1.0,0.530834,0.546212,0.590308,0.551222,0.520093,0.51273
K702,0.51729,0.520093,0.356905,0.368721,0.497837,0.591619,1.0,0.519813
K703,1.0,0.595313,0.561262,0.558287,0.550321,0.528864,0.51729,0.492447
K159,0.485783,0.49051,0.418576,0.437091,0.491711,0.481981,0.451226,0.42579
K182,0.550321,0.590308,0.505719,0.495052,1.0,0.548973,0.497837,0.502515
K183,0.561262,0.530834,1.0,0.556446,0.505719,0.452424,0.356905,0.460386
K128,0.411884,0.421601,0.415147,0.392648,0.417388,0.433686,0.404217,0.425242
K139,0.475463,0.449779,0.402664,0.42349,0.45517,0.424486,0.380004,0.39795
K104,0.420287,0.414192,0.424533,0.414931,0.402469,0.368167,0.301188,0.361701
K101,0.404712,0.435603,0.365576,0.425814,0.427467,0.446986,0.419445,0.461534


In [59]:
data.columns.to_list()

['K704',
 'K702',
 'K703',
 'K159',
 'K182',
 'K183',
 'K128',
 'K139',
 'K104',
 'K101',
 'K206',
 'K074',
 'K414',
 'K415',
 'K250']

In [58]:
data

Unnamed: 0_level_0,K704,K702,K703,K159,K182,K183,K128,K139,K104,K101,K206,K074,K414,K415,K250
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-01-01 01:00:00,28.0,24.0,54.0,44.0,26.0,34.0,31.0,30.0,56.0,59.0,36.0,6.0,30.0,9.0,15.0
2018-01-01 01:05:00,24.0,42.0,55.0,26.0,14.0,38.0,29.0,31.0,57.0,29.0,21.0,4.0,21.0,6.0,6.0
2018-01-01 01:10:00,40.0,41.0,52.0,30.0,17.0,27.0,25.0,22.0,48.0,33.0,30.0,6.0,20.0,3.0,7.0
2018-01-01 01:15:00,46.0,43.0,53.0,41.0,18.0,42.0,20.0,13.0,51.0,55.0,22.0,0.0,19.0,5.0,8.0
2018-01-01 01:20:00,6.0,46.0,60.0,32.0,5.0,27.0,34.0,34.0,9.0,57.0,21.0,0.0,27.0,12.0,19.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-03-31 23:35:00,4.0,4.0,9.0,10.0,13.0,4.0,10.0,19.0,14.0,21.0,13.0,3.0,4.0,1.0,2.0
2020-03-31 23:40:00,1.0,5.0,5.0,6.0,5.0,7.0,12.0,5.0,14.0,13.0,12.0,3.0,7.0,3.0,3.0
2020-03-31 23:45:00,8.0,7.0,4.0,7.0,4.0,12.0,6.0,8.0,17.0,12.0,13.0,3.0,19.0,5.0,3.0
2020-03-31 23:50:00,5.0,1.0,2.0,5.0,8.0,13.0,9.0,3.0,27.0,14.0,8.0,0.0,4.0,4.0,5.0


In [None]:
AE_results={}
for trajectory in errors.keys():
    for direction in errors[trajectory].keys():
        for threshold in errors[trajectory][direction].keys():
            AE_results[trajectory+'_'+direction+'_'+str(threshold)] = errors[trajectory][direction][threshold]['RMSE']

In [None]:
AE_results

{'T1_North_0': 10.59151633354107,
 'T1_North_0.25': 10.196899412467983,
 'T1_North_0.5': 9.955075986889941,
 'T1_North_0.75': 10.05299665551894,
 'T1_North_1': 9.830371060706703,
 'T1_South_0': 18.641919656471167,
 'T1_South_0.25': 17.971930531215893,
 'T1_South_0.5': 17.78031504964693,
 'T1_South_0.75': 17.8761927808514,
 'T1_South_1': 17.471410650736853,
 'T2_South_0': 15.783835842420112,
 'T2_South_0.25': 14.955686164952139,
 'T2_South_0.5': 14.87429694295857,
 'T2_South_0.75': 14.916430099779108,
 'T2_South_1': 14.930474792897037,
 'T2_North_0': 15.473913778461938,
 'T2_North_0.25': 14.940598792684167,
 'T2_North_0.5': 15.136461083171628,
 'T2_North_0.75': 15.01907330839573,
 'T2_North_1': 15.349926838139215}