In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def GRU_AE(X_train):
    Bi = layers.Bidirectional
    GRU = layers.GRU
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            Bi(GRU(128, return_sequences=True)),
            layers.Dropout(rate=0.2),
            Bi(GRU(64)),
            layers.RepeatVector(X_train.shape[1]),
            Bi(GRU(64, return_sequences=True)),
            layers.Dropout(rate=0.2),
            Bi(GRU(128)),
            layers.Dense(X_train.shape[1] *  X_train.shape[2]),
            layers.Reshape([X_train.shape[1], X_train.shape[2]])
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9846153331124285 0.9718082504501817 0.9916564273176011
yahoo_A1 0.9999999416666693 0.9166666376815789 0.9999999913849766
yahoo_A1 0.9777777234567926 0.8926667311517403 0.9782608650275311
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999797237572
yahoo_A1 0.666666577777783 0.29143647564645464 0.9216203524732386
yahoo_A1 0.666666577777783 0.27050687729346407 0.9185184986669245
yahoo_A1 0.999999948484851 0.9848484765594476 0.9999999981601732
yahoo_A1 0.9999999492063519 0.9920634873689427 0.9999999987844084
yahoo_A1 0.9960158854621382 0.9918788950663415 0.999899536658752
yahoo_A1 0.9999999487179513 0.9871794801841867 0.9999999983719279
yahoo_A1 0.9999999492248087 0.9925925881308424 0.9999999988166335
yahoo_A1 0.9937887686431877 0.9745889783097315 0.9997803244061312
yahoo_A1 0.9999999483870992 0.9838709592881969 0.9999999980537634
yahoo_A1 0.999999944444447 0.9444444227123633 0.9999999941537467
yahoo_A1 0.9999999487013013 0.987012979772212 0.9999999983645984
yahoo_A1 0.9999

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.9999999485074653 0.9850746183359884 0.999999998349235
yahoo_A2 0.9999999485074653 0.9850746190034947 0.9999999983492351
yahoo_A2 0.9999999485074653 0.9850746189752191 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9903846098910081 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.9999999485074653 0.9850746190000539 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9903846098749978 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840273
yahoo_A2 0.9999999485074653 0.9850746190044222 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9903846098713929 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840273
yahoo_A2 0.9999999485074653 0.9850746189410708 0.999999998349235
yahoo_A2 0.9999999490384642 0.9903846098770788 0.9999999988797315
yahoo_A2 0.9951690311652572 0.990202323624711 0.9999694738492407
yahoo_A2 0.9999998500000123 0.0 0.9999998997840273
yahoo_A2 0.9999999484127009 0.9850746189840089 0.999999998347312
yahoo_A2

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.7874015235662503 0.8537045008484805 0.9739058799320568
yahoo_A3 0.442477841491114 0.28783516453443037 0.536505347542783
yahoo_A3 0.999999949275365 0.9927536188420294 0.9999999989442364
yahoo_A3 0.25396823169564314 0.0013262608992346487 -0.0
yahoo_A3 0.9999999495073918 0.995073888458195 0.9999999990854482
yahoo_A3 0.9999999491379338 0.991379305325064 0.9999999988292892
yahoo_A3 0.5852089616298454 0.5937230049322031 0.6146810729869403
yahoo_A3 0.6641508947468885 0.6858259747229091 0.8515259375379873
yahoo_A3 0.8581080575374391 0.938587201375048 0.965313374558301
yahoo_A3 0.8380566305118946 0.7042990714232287 0.7909420817153593
yahoo_A3 0.9473683707138142 0.9870088372678111 0.9929502135231524
yahoo_A3 0.9832401729143935 0.9902126244642298 0.9960713590033418
yahoo_A3 0.39790572217867165 0.3589552623071707 0.804019315957822
yahoo_A3 0.6766916843462069 0.4271609537068065 0.24130254995582107
yahoo_A3 0.9999999460000024 0.9629629466749489 0.9999999960295938
yahoo_A3 0.99999994947090

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.9070631464352372 0.9683910615932863 0.9869664022226041
yahoo_A4 0.25396823169564314 0.029666356606897447 0.3136376234234649
yahoo_A4 0.41726615387920135 0.14148535571982507 0.37630910472072054
yahoo_A4 0.2945736181779962 0.10643937594313047 0.21485010439716581
yahoo_A4 0.8159644744539141 0.8722949519266805 0.9051307647926903
yahoo_A4 0.9691357518880533 0.9884110856722146 0.9956510838004777
yahoo_A4 0.9959183165347797 0.9916755662587066 0.9999230580391214
yahoo_A4 0.8148147644890291 0.8999382579986701 0.964454525098302
yahoo_A4 0.8307691802963344 0.9147450760197999 0.9457800306084796
yahoo_A4 0.720930182002287 0.8248246455852262 0.8995517818614713
yahoo_A4 0.7028423269541796 0.7308825408450634 0.7728363064573144
yahoo_A4 0.5636363143181861 0.47444532946065665 0.7657247146340683
yahoo_A4 0.728110549860905 0.8299426432467397 0.8941133718960126
yahoo_A4 0.7782426294497675 0.4970241071168126 0.8890760654762111
yahoo_A4 0.08762885755526698 0.028648647540590995 0.35982477876280555


In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.868594,0.80835,0.893488
yahoo_A2,0.94911,0.641197,0.992418
yahoo_A3,0.842826,0.824581,0.885721
yahoo_A4,0.766106,0.71657,0.784689


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.25300017462469454 0.13609743867087887 0.38085808500936236
D2 0.2699530282282525 0.58033152609849 0.513043477732262


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.253,0.136097,0.380858
D2,0.269953,0.580332,0.513043


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.20232181144029696 0.5556857798257815 0.521888680406877
smd 0.4944121092119041 0.3553964203737251 0.8194081084534198
smd 0.2167562934448035 0.22192045593377918 0.5722941098857486
smd 0.21315784906164412 0.2700091014456399 0.570233794556264
smd 0.689655126355723 0.7124870584612133 0.9004998621636707
smd 0.7582545434747453 0.8015753978118575 0.8660813721496888
smd 0.47728597388506266 0.5060263954880917 0.708917755565759
smd 0.6640393588431327 0.6527044210874728 0.8230047530233363
smd 0.5387138642047921 0.5722119920914801 0.8563585236919473
smd 0.2596273980668859 0.29469613108104226 0.5351486262393684
smd 0.5409090473837271 0.6176962032808778 0.6973927291068522
smd 0.33720843027623354 0.42183613786373186 0.7418653756769388
smd 0.7634387971760838 0.785165831120561 0.9306629731160502
smd 0.5519820037794383 0.6455285404327471 0.7111586175998494
smd 0.826223826370211 0.8773562432363131 0.9466348868876385
smd 0.9677418850899386 0.8337593496757939 0.9809176805306573
smd 0.7643821429023572 

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.561876,0.568983,0.782024


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(32, 16)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.533333281777782 0.4617297346798932 0.6403356439237116
D2 0.4444443950617328 0.33794124485934746 0.6729986359986803
D3 0.4705881840830494 0.2802485102837967 0.5935625227671435
D4 0.42857137959184166 0.36800538464353744 0.6587570543690245
D5 0.391304309735353 0.23766126789709818 0.7779720198671901
D6 0.26666661577778683 0.10675097404460569 0.4752238461864569
D7 0.12499997656250315 0.03942227426544456 0.558061950284714
D8 0.19251335137979503 0.07751874963788538 0.38409378400269567
D9 0.38805966963689265 0.1876006752040226 0.4018620992416828


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.533333,0.46173,0.640336
D2,0.444444,0.337941,0.672999
D3,0.470588,0.280249,0.593563
D4,0.428571,0.368005,0.658757
D5,0.391304,0.237661,0.777972
D6,0.266667,0.106751,0.475224
D7,0.125,0.039422,0.558062
D8,0.192513,0.077519,0.384094
D9,0.38806,0.187601,0.401862


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.4455610890712692 0.45380721089517917 0.6264662288427515


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.445561,0.453807,0.626466


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.5904152856393391 0.533700383799436 0.748199452187156


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.590415,0.5337,0.748199
