In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def GRU_AE(X_train):
    GRU = layers.GRU
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            GRU(64, return_sequences=True),
            GRU(32),
            layers.RepeatVector(X_train.shape[1]),
            GRU(32, return_sequences=True),
            GRU(64),
            layers.Dense(X_train.shape[1] *  X_train.shape[2]),
            layers.Reshape([X_train.shape[1], X_train.shape[2]])
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490291291
yahoo_A1 0.9999999166666703 0.6666666055555603 0.9999999656957942
yahoo_A1 0.9999999333333361 0.8333332883333356 0.9999999823333336
yahoo_A1 0.9999998500000123 0.0 0.9999998990476291
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384643
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384642
yahoo_A1 0.9999999166666703 0.6666666055555603 0.999999965714287
yahoo_A1 0.9999999466666691 0.9666666517589368 0.9999999953333334
yahoo_A1 0.8749999474609403 0.9163424156043661 0.9788199188017602
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790099013
yahoo_A1 0.9999999333333361 0.8333332883333358 0.9999999823232327
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790000003
yahoo_A1 0.9999999437500025 0.9374999763313819 0.9999999926264045
yahoo_A1 0.7999999200000041 0.48333328950000365 0.9901960444540574
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790291265
yahoo_A1 0.9999999409090937 

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.0707964532226491 0.019686398878357714 0.6389639461992842
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.9999999357142885 0.85714281602041 0.9999999851907257
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.9999999357142885 0.8571428160204102 0.9999999851907255
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.9999999357142885 0.8571428159523828 0.9999999851907255
yahoo_A2 0.9999998500000123 0.0 0.9999998992248161
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.6153845562130219 0.5086446602434829 0.9734729316659291
yahoo_A2 0.9230768591716004 0.8119047226103571 0.9977561557549948
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.041450773099949295 0.003246753698535932 0.17297295558217857

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A3 0.12121210964187425 0.034715266414461605 0.24486863351711038
yahoo_A3 0.9999999333333361 0.8333332883333356 0.9999999824858761
yahoo_A3 0.04761904274376419 0.01219512132871385 0.6721310814162924
yahoo_A3 0.8888888296296326 0.8255023611360244 0.9889376107305726
yahoo_A3 0.6666666000000033 0.27826172014306466 0.8208333055659732
yahoo_A3 0.17647057188581453 0.04433406881099009 0.2502940728194697
yahoo_A3 0.18181812892563337 0.04471171837656128 0.4961864289672152
yahoo_A3 0.3333332777777854 0.10430571437753522 0.7080508253742985
yahoo_A3 0.18181813181819464 0.08056669893339746 0.45047862014651885
yahoo_A3 0.37037032318244695 0.20333595893114625 0.802875234197987
yahoo_A3 0.4285713816326568 0.23824988182449552 0.5995004796604153
yahoo_A3 0.04444444000000022 0.013272361850006846 0.43442620743079935
yahoo_A3 0.17647057188581453 0.05316441349587983 0.3179687464828395
yahoo_A3 0.06349205714285747 0.01696832555625222 0.6528925273529

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.28571425306122533 0.047182766239004185 0.6057909469199986
yahoo_A4 0.1538461254437907 0.041637526801354685 0.8032786477022329
yahoo_A4 0.0624999938476567 0.007990475284488499 0.2354166444913216
yahoo_A4 0.06299211978424002 0.013842663923441885 0.41458331260590386
yahoo_A4 0.9411764096885841 0.8602430182337842 0.9989224004454333
yahoo_A4 0.666666611111114 0.5293923683354409 0.8636083613961316
yahoo_A4 0.9999999250000032 0.7499999447916703 0.9999999741666674
yahoo_A4 0.5714285173469424 0.4004726445550825 0.9411764509738018
yahoo_A4 0.33333328518519106 0.12346299138724215 0.7682203254040509
yahoo_A4 0.06557376404192469 0.0233107724380989 0.45659720829716477
yahoo_A4 0.363636317355376 0.195548837310188 0.7059728962911435
yahoo_A4 0.0624999938476567 0.006172840073614333 0.3416666322152812
yahoo_A4 0.39999995200000177 0.041396026109016555 0.6447916476137159
yahoo_A4 0.32258061602497595 0.13465144529691986 0.8697739956762747
yahoo_A4 0.03174602857142873 0.004065040847075319 -0.0
ya

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.808435,0.550664,0.893323
yahoo_A2,0.913639,0.46687,0.964559
yahoo_A3,0.461718,0.299427,0.697396
yahoo_A4,0.419905,0.231099,0.686079


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2295534087061448 0.11934646332439376 0.47490455707919876
D2 0.19498578202833744 0.508836416181533 0.5108962808018799


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.229553,0.119346,0.474905
D2,0.194986,0.508836,0.510896


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.17627464443920743 0.5532991977145021 0.5087209301507045
smd 0.3406113090673386 0.25450221625538183 0.8497082875017561
smd 0.10071937465397049 0.10586872631085434 0.5321338477198665
smd 0.10256405257596121 0.1120499487385144 0.5348809345451752
smd 0.6551723644470901 0.6378265802435517 0.8728160765025966
smd 0.867773276317471 0.8611645098785048 0.9147817824290806
smd 0.23600969573173824 0.3719933792883684 0.5689858632201376
smd 0.35346751136536103 0.3657115733052223 0.6670048399956172
smd 0.37366543094249705 0.33006695120485974 0.6984909128425297
smd 0.21837868410447125 0.20121439643378974 0.5078380141168557
smd 0.24242421765126243 0.48851743500970074 0.5704330114547569
smd 0.3260072976244171 0.35180008122547524 0.6610091175896096
smd 0.5172413293242953 0.4706337089368943 0.7338668264247119
smd 0.45751630398564924 0.6134643730051927 0.651061384089655
smd 0.8072726767021519 0.8176820145337635 0.9152050799168687
smd 0.9499999477500025 0.8948278943841095 0.9800480862494921
smd 0.50563

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.44692,0.477986,0.721481


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.44696964667700284 0.5045910586012083 0.7908697060743594
D2 0.320987609612362 0.29396711365181183 0.6187224543614985
D3 0.2795698619956095 0.2101974164265049 0.5997106244206601
D4 0.2599999655066712 0.1519374901776919 0.5873508834004918
D5 0.4481327381484517 0.28146042339925625 0.8528652701734805
D6 0.1571194617650775 0.08186611915452571 0.40146961715481255
D7 0.09090904564852975 0.03809818014548835 0.5999543623371137
D8 0.17583891011296932 0.07543425836044337 0.3907810461464036
D9 0.3591212692948176 0.22068245618562568 0.44380112001998473


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.44697,0.504591,0.79087
D2,0.320988,0.293967,0.618722
D3,0.27957,0.210197,0.599711
D4,0.26,0.151937,0.587351
D5,0.448133,0.28146,0.852865
D6,0.157119,0.081866,0.40147
D7,0.090909,0.038098,0.599954
D8,0.175839,0.075434,0.390781
D9,0.359121,0.220682,0.443801


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(16, 8)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.28155337290036975 0.10123706160306813 0.3259057521135378


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.281553,0.101237,0.325906


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.4434844825662533 0.363963611501472 0.5665589368381779


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.443484,0.363964,0.566559
