In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def GRU_AE(X_train):
    GRU = layers.GRU
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            GRU(64, return_sequences=True),
            GRU(32),
            layers.RepeatVector(X_train.shape[1]),
            GRU(32, return_sequences=True),
            GRU(64),
            layers.Dense(X_train.shape[1] *  X_train.shape[2]),
            layers.Reshape([X_train.shape[1], X_train.shape[2]])
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.999999948484851 0.9848484768885952 0.9999999981470108
yahoo_A1 0.9999999416666693 0.9166666376815789 0.9999999913849765
yahoo_A1 0.9777777234567926 0.9361449885721481 0.9782608650275311
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999797237571
yahoo_A1 0.9230768591716004 0.7516233384924756 0.9932539521948067
yahoo_A1 0.9230768591716004 0.7321428197320127 0.983333317279762
yahoo_A1 0.999999948484851 0.9848484767468211 0.9999999981601732
yahoo_A1 0.9999999492063519 0.9920634873702614 0.9999999987844084
yahoo_A1 0.9918032278789328 0.9898985611929751 0.9987135935114584
yahoo_A1 0.9999999487179513 0.9871794801696128 0.9999999983719279
yahoo_A1 0.9999999492592618 0.9925925881638828 0.9999999988187306
yahoo_A1 0.9999999487654349 0.9876543140316955 0.9999999984095602
yahoo_A1 0.9999999483870992 0.9838709593429846 0.9999999980537634
yahoo_A1 0.999999944444447 0.9444444227098194 0.9999999941537467
yahoo_A1 0.9999999487013013 0.9870129795396632 0.9999999983645984
yahoo_A1 0.9999

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.9999999485074653 0.9850746189969276 0.999999998349235
yahoo_A2 0.9999999485074653 0.9850746190043685 0.9999999983492349
yahoo_A2 0.9999999485074653 0.9701492469344892 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9903846098922338 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840273
yahoo_A2 0.9999999485074653 0.9850746190044062 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9711538417923347 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840274
yahoo_A2 0.8170731214083907 0.7505540954466747 0.9782731893920913
yahoo_A2 0.9999999490384642 0.9903846098914316 0.9999999988797316
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.9999999485074653 0.9850746190044576 0.9999999983492349
yahoo_A2 0.9999999490099035 0.9903846098613955 0.9999999988780987
yahoo_A2 0.8952380443900254 0.9562416467326671 0.9922771661655505
yahoo_A2 0.9999998500000123 0.0 0.999999899784027
yahoo_A2 0.9481480967462305 0.9688524800831924 0.9977918933084817
yahoo_

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.2857142606053028 0.2853253122011201 0.6911858062733192
yahoo_A3 0.442477841491114 0.09462400950176422 0.4172518480714761
yahoo_A3 0.999999949275365 0.9927536188422551 0.9999999989442364
yahoo_A3 0.25396823169564314 0.0013262609102632883 -0.0
yahoo_A3 0.9999999495073918 0.9950738884784666 0.9999999990854482
yahoo_A3 0.4822694657590469 0.4271693452992741 0.66942722936744
yahoo_A3 0.5852089616298454 0.58962170373274 0.6564034696207315
yahoo_A3 0.45614031550631234 0.18976059016986113 0.4408918753686403
yahoo_A3 0.5210083646578659 0.43479112237821094 0.6194415426334163
yahoo_A3 0.7134502462945892 0.45954201756558466 0.4300327059027479
yahoo_A3 0.637770854195864 0.5064712119082856 0.45756002008940716
yahoo_A3 0.5783521396394755 0.5180157283557912 0.5226381737902993
yahoo_A3 0.3825136226104132 0.2324021067763571 0.7922960297297912
yahoo_A3 0.6766916843462069 0.3231957555916105 0.18435418694243086
yahoo_A3 0.11563168070466742 0.0012077301619393863 -0.0
yahoo_A3 0.9625667944107665 0.

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.7022900259046712 0.7997385661751326 0.8575840840662985
yahoo_A4 0.25396823169564314 0.025347731404241944 0.0925539049112061
yahoo_A4 0.41726615387920135 0.02747053381213145 0.10080649284502538
yahoo_A4 0.2945736181779962 0.07608773247962217 0.199214990012626
yahoo_A4 0.7412140102195622 0.8240662618828989 0.817185660310877
yahoo_A4 0.7681158935307738 0.8102458670646077 0.7927764579527029
yahoo_A4 0.9999999491869945 0.991869913911148 0.9999999988715343
yahoo_A4 0.947368370144047 0.9748876634464334 0.9945792220214215
yahoo_A4 0.5942491593197877 0.29515811825462984 0.49662327059518807
yahoo_A4 0.45070419027970904 0.23890491939324238 0.4306700995387937
yahoo_A4 0.637770854195864 0.4491972595352334 0.5171506678590306
yahoo_A4 0.3882783568463308 0.11514425719072209 0.5390907921890572
yahoo_A4 0.5688072904469362 0.5937244552376824 0.6698280733496706
yahoo_A4 0.5187499584433627 0.27605479602710115 0.6890782283322308
yahoo_A4 0.07439824225732532 0.0011792458495933056 -0.0
yahoo_A4 0.9

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.889975,0.814288,0.875743
yahoo_A2,0.911386,0.581598,0.965196
yahoo_A3,0.69894,0.637631,0.712421
yahoo_A4,0.630361,0.51809,0.615914


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.25300017462469454 0.1331675359666748 0.3951811331720413
D2 0.2699530282282525 0.5840009300457375 0.5173913038147389


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.253,0.133168,0.395181
D2,0.269953,0.584001,0.517391


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.20232181144029696 0.5556857798256555 0.521888680406877
smd 0.4799153862067548 0.3704587461464109 0.8456969709069924
smd 0.19218984487755322 0.20555036782792857 0.5618144099619207
smd 0.21862697650832094 0.2743825490324633 0.5725448528211273
smd 0.689655126355723 0.7242866534928226 0.9248570588492223
smd 0.8011374686615652 0.8809964340353864 0.9323269717783683
smd 0.48169238762015126 0.5144352410117876 0.7142559717939176
smd 0.5171568153533427 0.5196653098021569 0.804418165614772
smd 0.5523752827780825 0.6017513820426599 0.8639486913749549
smd 0.2596273980668859 0.29149455938001556 0.5349337042184247
smd 0.6330730552116334 0.6320983417488386 0.7468003746834629
smd 0.3405362718141473 0.37586539247074835 0.7311994784467944
smd 0.7878912186491072 0.8065689774444614 0.9503663207539063
smd 0.5660660206263733 0.6505157485618916 0.7159167705976891
smd 0.8598943271553358 0.8948122680627708 0.9702345590871243
smd 0.9677418850899386 0.6737817434280359 0.9840600305195426
smd 0.76438214290235

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.561803,0.556532,0.789272


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(32, 16)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.5624999472656292 0.4881127743697098 0.6301232256483805
D2 0.3870967242455834 0.26260816430923245 0.6140886409731234
D3 0.15584413094957364 0.05302856202949947 0.5106368198669071
D4 0.41379305541023087 0.2066308962194268 0.6658192012465715
D5 0.42105258822715075 0.22531546054122592 0.7581585002941601
D6 0.26666661577778683 0.12357711793549797 0.5669006870844847
D7 0.19999995600000867 0.06704303780946932 0.599111017268
D8 0.19251335137979503 0.08312357755157318 0.4403973480889677
D9 0.3909774115665127 0.17500871871334933 0.3539079093660659


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.5625,0.488113,0.630123
D2,0.387097,0.262608,0.614089
D3,0.155844,0.053029,0.510637
D4,0.413793,0.206631,0.665819
D5,0.421053,0.225315,0.758159
D6,0.266667,0.123577,0.566901
D7,0.2,0.067043,0.599111
D8,0.192513,0.083124,0.440397
D9,0.390977,0.175009,0.353908


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.3606206231571944 0.26062952329393857 0.557951891126357


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.360621,0.26063,0.557952


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.5982367260887745 0.5472411889228564 0.754268999220557


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.598237,0.547241,0.754269
