In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def GRU_AE(X_train):
    Bi = layers.Bidirectional
    GRU = layers.GRU
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            Bi(GRU(128, return_sequences=True)),
            layers.Dropout(rate=0.2),
            Bi(GRU(64)),
            layers.RepeatVector(X_train.shape[1]),
            Bi(GRU(64, return_sequences=True)),
            layers.Dropout(rate=0.2),
            Bi(GRU(128)),
            layers.Dense(X_train.shape[1] *  X_train.shape[2]),
            layers.Reshape([X_train.shape[1], X_train.shape[2]])
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9999999000000052 0.49999992500001 0.9999999490291289
yahoo_A1 0.7999999200000041 0.4285713862310345 0.983818735506542
yahoo_A1 0.9090908429752097 0.7547169388267891 0.9166666504722225
yahoo_A1 0.9999998500000123 0.0 0.9999998990476292
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384643
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384643
yahoo_A1 0.9999999166666703 0.6666666055555603 0.999999965714287
yahoo_A1 0.9999999466666691 0.9666666517740905 0.9999999953333334
yahoo_A1 0.8923076398579909 0.9163000251722415 0.979149420717094
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790099013
yahoo_A1 0.9999999333333361 0.8333332883333358 0.9999999823232326
yahoo_A1 0.999999930000003 0.7999999498333362 0.9999999790000005
yahoo_A1 0.9999999437500025 0.9374999763313819 0.9999999926264045
yahoo_A1 0.9999999166666703 0.6666666055555603 0.9999999656862758
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790291265
yahoo_A1 0.9999999409090937 0.909

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.047619042913832545 0.00632911441451456 0.5783783450252035
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.9999999357142885 0.8571428160204101 0.9999999851907257
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.9999999250000032 0.7499999447916703 0.99999997445946
yahoo_A2 0.9999999357142885 0.8571428160204101 0.9999999851907255
yahoo_A2 0.9999998500000123 0.0 0.9999998992248164
yahoo_A2 0.9999999250000032 0.7499999447916703 0.99999997445946
yahoo_A2 0.9999999357142885 0.85714281602041 0.9999999851907255
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.7272726677685981 0.4732905412004781 0.8945400017113746
yahoo_A2 0.9999999357142885 0.8571428160204102 0.9999999851907255
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.041450773099949295 0.007352941246102919 0.6405404800591732
yah

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.034188030769230944 0.004807692476381103 0.15573769290513406
yahoo_A3 0.19512191885782762 0.06984898831661553 0.5452586134038719
yahoo_A3 0.9999999333333361 0.833333288055558 0.9999999824858761
yahoo_A3 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A3 0.9999999400000027 0.8999999672103189 0.9999999891228071
yahoo_A3 0.9999999250000032 0.7499999447916703 0.9999999741666674
yahoo_A3 0.17647057188581453 0.051349112449322026 0.350971444591802
yahoo_A3 0.27272723512397123 0.10428722987881031 0.8135593061890745
yahoo_A3 0.5999999400000039 0.34369491177776956 0.879378514327269
yahoo_A3 0.33333330000000166 0.1915617133884161 0.7197002828363563
yahoo_A3 0.7777777191358058 0.7178867235525681 0.9598927770563537
yahoo_A3 0.9999999400000027 0.8999999672103188 0.9999999891228071
yahoo_A3 0.7999999200000041 0.29166664027777967 0.9959015887328702
yahoo_A3 0.17647057188581453 0.029301225261482135 0.22547219291671622
yahoo_A3 0.07407405377229562 0.015878789235824798 0.5309916914000109
ya

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.4999999500000025 0.19225520002188026 0.5429378392296199
yahoo_A4 0.9999999000000052 0.4999999375000063 0.9999999491803307
yahoo_A4 0.0624999938476567 0.004504505242342159 0.08333332493055642
yahoo_A4 0.08510637464916311 0.01746588053792946 0.39305554410532445
yahoo_A4 0.666666611111114 0.5050104287200837 0.889008608810661
yahoo_A4 0.9999999375000026 0.8749999621205374 0.9999999866379312
yahoo_A4 0.9999999250000032 0.7499999447916703 0.9999999741666674
yahoo_A4 0.999999930000003 0.7999999503333362 0.9999999791596641
yahoo_A4 0.6666666074074105 0.4644025401978338 0.8305084592396824
yahoo_A4 0.7499999312500037 0.5028408719912216 0.9812499746510424
yahoo_A4 0.666666611111114 0.46249194174588726 0.8658405056026853
yahoo_A4 0.0624999938476567 0.01651956271929248 0.3652777627974544
yahoo_A4 0.6666666000000033 0.27703755643897165 0.8027777528495378
yahoo_A4 0.9090908429752097 0.7886904330251582 0.9971751237782888
yahoo_A4 0.036363629223141535 0.010036129638135067 0.4057376734379226


In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.823036,0.582004,0.907337
yahoo_A2,0.939426,0.481707,0.981659
yahoo_A3,0.677855,0.464058,0.822208
yahoo_A4,0.631576,0.419154,0.782896


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2295534087061448 0.11435055786134503 0.47960692111781805
D2 0.19498578202833744 0.5127251721427412 0.5108992765450084


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.229553,0.114351,0.479607
D2,0.194986,0.512725,0.510899


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.17627464443920743 0.553299197714649 0.5087209301507045
smd 0.31884053006984603 0.2181310244189012 0.8501856509048835
smd 0.10071937465397049 0.10456384955978396 0.5321224883395712
smd 0.10256405257596121 0.11212365172076746 0.5348969847078495
smd 0.6551723644470901 0.6464934261558403 0.8730589964468244
smd 0.8617511027774563 0.8565912384226471 0.9084199663714048
smd 0.21938772255961542 0.386003492584139 0.5624395143698094
smd 0.3944443969475366 0.43717323953682546 0.6547037738062541
smd 0.2591875730119921 0.1948514295751734 0.6279543498372929
smd 0.21837868410447125 0.1865266999881764 0.5055019126926952
smd 0.20618554490381766 0.4739804419890659 0.5586602420457552
smd 0.3882352495263892 0.49698586346414353 0.638648948335108
smd 0.49999995017395504 0.4621296907517041 0.7215701611023275
smd 0.4585986888393067 0.6291734486983018 0.6511558333270409
smd 0.7941175965181692 0.8084104998955339 0.8978814626980185
smd 0.9629629106538662 0.940729386530256 0.9642857119734021
smd 0.5008077157

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.442977,0.481682,0.711049


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.4666666163237365 0.42827502869676753 0.7652634475573094
D2 0.3550295354644516 0.3391944874834814 0.6216770088676814
D3 0.2857142575534381 0.20870592076866495 0.6152340747288508
D4 0.26446277002937557 0.15136613761783205 0.5691518073220097
D5 0.35555552076190805 0.2214000863472983 0.7998066728860694
D6 0.16049377867704548 0.08728293615392999 0.44964997393862366
D7 0.06432746761758143 0.02748126541818672 0.5740746108892747
D8 0.17583891011296932 0.07643059895451278 0.39501425884864594
D9 0.3580130914399724 0.21307437816814523 0.40177145554168625


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.466667,0.428275,0.765263
D2,0.35503,0.339194,0.621677
D3,0.285714,0.208706,0.615234
D4,0.264463,0.151366,0.569152
D5,0.355556,0.2214,0.799807
D6,0.160494,0.087283,0.44965
D7,0.064327,0.027481,0.574075
D8,0.175839,0.076431,0.395014
D9,0.358013,0.213074,0.401771


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(16, 8)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.26431715744532397 0.1205063101143548 0.3998493280389599


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.264317,0.120506,0.399849


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.44010763455218954 0.3259276804719209 0.5665224514234662


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.440108,0.325928,0.566522
