In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def GRU_AE(X_train):
    Bi = layers.Bidirectional
    GRU = layers.GRU
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            Bi(GRU(128, return_sequences=True)),
            layers.Dropout(rate=0.2),
            Bi(GRU(64)),
            layers.RepeatVector(X_train.shape[1]),
            Bi(GRU(64, return_sequences=True)),
            layers.Dropout(rate=0.2),
            Bi(GRU(128)),
            layers.Dense(X_train.shape[1] *  X_train.shape[2]),
            layers.Reshape([X_train.shape[1], X_train.shape[2]])
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.5714285142857172 0.24999997875000163 0.33333328888889363
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.33333329444444565 0.09999998800000123 0.49999993750000654
yahoo_A1 0.7999999200000041 0.4999999416666725 0.8333332638888935
yahoo_A1 0.666666577777783 0.37499995687500437 0.4999999583333361
yahoo_A1 0.33333329444444565 0.09999998800000123 0.12499998437500164
yahoo_A1 0.8888888197530895 0.25000000993055255 -0.0
yahoo_A1 0.5714285142857172 0.1666666606944441 0.33333328888889363
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998750000131
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.33333329444444565 0.09999998800000123 0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.7499999343750033 0.37499997479166813 0.24999997500000196
yahoo_A1 0.7499999343750033 0.16666667444444216 -0.0
yahoo_A1 0.7499999343750033 0.24999999249999968 -0.0
yahoo_A1 0.7499999343750033 9.166665633334498e-09 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 -0.0
yah

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.46153841893491365 1.3032405600333905e-08 0.0
yahoo_A2 0.46153841893491365 4.351851412631487e-09 0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.4285713887755125 0.09999999971717134 0.49999994375000567
yahoo_A2 0.28571425306122533 0.08333332361111209 0.1666666472222242
yahoo_A2 0.46153841893491365 0.21527776702546342 0.4880952106859423
yahoo_A2 0.4285713887755125 0.10714285482941653 0.24999997187500275
yahoo_A2 0.24999997187500062 0.07142856326530694 0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.4285713887755125 1.1717170107356334e-08 0.0
yahoo_A2 0.24999997187500062 0.07142856326530694 0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 0.0
yahoo_A2 0.4285713887755125 1.1717170107356334e-08 0.0
yahoo_A2 0.4285713887755125 0.08680555636898456 0.0624999960937502
yahoo_A2 0.24999997187500062 0.07142856326530694 -0.0
yahoo_A2 0.46153841893491365 0.0 0.0
yahoo_A2 0.4285713887755125 0.07142857498247705 0.24999997187500275
yahoo_A2 0.249

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.4999999500000025 1.3333331148236856e-08 0.0
yahoo_A3 0.33333329444444565 0.09999998300000205 0.12499999062500056
yahoo_A3 0.7999999360000031 1.4444442978045904e-08 0.0
yahoo_A3 0.4999999500000025 0.0 0.0
yahoo_A3 0.9090908429752097 0.0 0.0
yahoo_A3 0.4999999500000025 0.10000000133333238 -0.0
yahoo_A3 0.9999999250000032 2.4999997023184203e-08 0.9999999250000045
yahoo_A3 0.6666666074074105 0.0 0.0
yahoo_A3 0.7999999360000031 5.277777344711406e-09 0.0
yahoo_A3 0.9999999333333361 0.33333331944444494 0.0
yahoo_A3 0.9090908429752097 4.083333052458349e-09 0.0
yahoo_A3 0.9090908429752097 5.1513881203937667e-08 0.0
yahoo_A3 0.39999995200000177 0.0 0.39999995200000493
yahoo_A3 0.7999999360000031 0.0 0.0
yahoo_A3 0.28571425306122533 0.0 0.0
yahoo_A3 0.9090908429752097 0.0 0.0
yahoo_A3 0.4999999500000025 1.3333331148236856e-08 0.0
yahoo_A3 0.4999999500000025 0.10000000133333238 0.0
yahoo_A3 0.6666666074074105 0.1250000081249981 0.0
yahoo_A3 0.28571425306122533 0.08333332361111209 -0.0
y

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.6666666074074105 0.0 0.0
yahoo_A4 0.6666666000000033 0.16666666527777654 0.2499999812500011
yahoo_A4 0.6666666074074105 2.374999645729026e-08 0.0
yahoo_A4 0.6666666000000033 0.2499999833333341 0.24999996875000327
yahoo_A4 0.9090908429752097 0.4374999812013897 -0.0
yahoo_A4 0.7999999360000031 0.5138888513657436 0.4166666340277798
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.9090908429752097 4.083333052458349e-09 0.0
yahoo_A4 0.7999999360000031 0.16666667972221952 0.0
yahoo_A4 0.9090908429752097 0.0 0.0
yahoo_A4 0.6666666074074105 0.1250000081249981 0.0
yahoo_A4 0.6666666074074105 0.12500000187499916 0.0
yahoo_A4 0.28571425306122533 0.0 0.0
yahoo_A4 0.33333329444444565 0.09999998800000123 0.19999997600000247
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.9090908429752097 0.0 0.0
yahoo_A4 0.7999999360000031 5.277777344711406e-09 0.0
yahoo_A4 0.9999999333333361 0.0 0.0
yahoo_A4 0.9090908429752097 0.0 0.0
yahoo_A4 0.4999999500000025 0.0 0.0
y

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.489768,0.160714,0.195685
yahoo_A2,0.381367,0.06436,0.054315
yahoo_A3,0.700941,0.064553,0.049789
yahoo_A4,0.685002,0.081877,0.062263


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.27054187472880764 0.05547500453211374 0.20591594521826848
D2 0.2987124192342488 0.19086556201111204 0.5297422235491536


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.270542,0.055475,0.205916
D2,0.298712,0.190866,0.529742


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.23153690558205123 0.06546275382269463 0.49999999900806086
smd 0.1424936253701881 0.0 0.011730205233258859
smd 0.18378376568444288 0.09075988285121814 0.47930148029837544
smd 0.2095237660226846 0.15294130455310678 0.5557853335620123
smd 0.08823528509688669 0.029248531875446893 0.42340651277802754
smd 0.4861406875073337 0.30040744259703983 0.5226071277427815
smd 0.3040379788243149 0.1505521070381119 0.4480047505374323
smd 0.24864861156465118 0.15083085963432236 0.5758023125783926
smd 0.21359221382788363 0.10090537830700243 0.43771647202857
smd 0.43609018948499356 0.1483420370642837 0.7307937167703097
smd 0.12690354134994566 0.08636809236937637 0.4029927998668187
smd 0.29953914490114675 0.19525366670407357 0.256056914279959
smd 0.3577981294588032 0.21318835392937732 0.6747564121009431
smd 0.08547007725180877 0.022321428454838966 0.4999999973831775
smd 0.227390160607337 0.115308871948264 0.5383303553623384
smd 0.026737965262089488 0.006775067613340092 0.4999999898626375
smd 0.1902833

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.206213,0.089485,0.442575


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(64, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.32786882397205264 0.21949724434983772 0.5318840513227964
D2 0.3043477989603044 0.02316602278801385 0.03124999938151046
D3 0.11009173251409898 0.0 0.0
D4 0.2999999730000019 0.0 0.0
D5 0.27450977916186264 0.08888880619771528 0.18918918584646421
D6 0.36363631074380753 0.2242243328136513 0.6624423857622446
D7 0.05079364581103594 0.0 0.0
D8 0.21276593798098847 0.0 0.0
D9 0.417910413455115 0.0 0.0


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.327869,0.219497,0.531884
D2,0.304348,0.023166,0.03125
D3,0.110092,0.0,0.0
D4,0.3,0.0,0.0
D5,0.27451,0.088889,0.189189
D6,0.363636,0.224224,0.662442
D7,0.050794,0.0,0.0
D8,0.212766,0.0,0.0
D9,0.41791,0.0,0.0


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(512, 256)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.7499999343750033 0.1666666744444422 -0.0


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.75,0.166667,0.0


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(64, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.42201831455264965 0.0 0.0


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.422018,0.0,0.0
