In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def LSTM_AE(X_train):
    LSTM = layers.LSTM
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            LSTM(64, return_sequences=True),
            LSTM(32),
            layers.RepeatVector(X_train.shape[1]),
            LSTM(32, return_sequences=True),
            LSTM(64),
            layers.Dense(X_train.shape[1] *  X_train.shape[2]),
            layers.Reshape([X_train.shape[1], X_train.shape[2]])
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]        

        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.999999948484851 0.9848484768887302 0.9999999981470108
yahoo_A1 0.9999999416666693 0.9166666376815789 0.9999999913849765
yahoo_A1 0.9777777234567926 0.9361449885721481 0.9782608650275311
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999797237573
yahoo_A1 0.9230768591716004 0.7593984577788779 0.9948412530910025
yahoo_A1 0.9230768591716004 0.738168914161105 0.988095221750756
yahoo_A1 0.999999948484851 0.9696969627530068 0.9999999981601732
yahoo_A1 0.9999999492063519 0.9920634873729 0.9999999987844085
yahoo_A1 0.9999999492063519 0.9920634873739651 0.9999999987844084
yahoo_A1 0.9999999487179513 0.9871794802217732 0.9999999983719279
yahoo_A1 0.9999999492592618 0.9925925880007378 0.9999999988187308
yahoo_A1 0.9999999487654349 0.9753086360122195 0.9999999984095602
yahoo_A1 0.9999999483870992 0.9838709593737203 0.9999999980537634
yahoo_A1 0.999999944444447 0.9444444227013403 0.9999999941537466
yahoo_A1 0.9999999487013013 0.98701297998387 0.9999999983645982
yahoo_A1 0.9999999476

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.9999999485074653 0.9850746189943591 0.9999999983492349
yahoo_A2 0.9999999485074653 0.985074619003482 0.9999999983492349
yahoo_A2 0.9999999485074653 0.9701492469322562 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9903846098727093 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.9999999485074653 0.9850746190042557 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9711538417415614 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.8333332832593717 0.7911154921918093 0.9819809165728212
yahoo_A2 0.9999999490384642 0.9903846098920692 0.9999999988797317
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.9999999485074653 0.9850746190043822 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9903846098876954 0.9999999988797316
yahoo_A2 0.5878135728883274 0.6572891379528537 0.9179334544051235
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.9781021383771138 0.9820830772969945 0.9996929890922319
yahoo

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.25396823169564314 0.0013262608999013877 -0.0
yahoo_A3 0.442477841491114 0.17299010715559546 0.32321842108632104
yahoo_A3 0.999999949275365 0.9927536188425328 0.9999999989442363
yahoo_A3 0.25396823169564314 0.001326260899756491 -0.0
yahoo_A3 0.8111110610555586 0.9041987555929426 0.9057466465900406
yahoo_A3 0.41726615387920135 0.2314893221977642 0.5052726497732679
yahoo_A3 0.5852089616298454 0.2455146031560686 0.13277793151382344
yahoo_A3 0.4977375065621147 0.35785461996387935 0.6905714110088635
yahoo_A3 0.5210083646578659 0.46793066962709245 0.6247275625516848
yahoo_A3 0.7134502462945892 0.5248729330995161 0.5705160202954571
yahoo_A3 0.7239818505343905 0.8308587104691132 0.8210357010431738
yahoo_A3 0.5783521396394755 0.5402835981060761 0.5144388844614598
yahoo_A3 0.19301846297450495 0.014077045869936486 0.8689567228251338
yahoo_A3 0.6766916843462069 0.4248365342065774 0.2411483301148734
yahoo_A3 0.11563168070466742 0.00120773016973904 -0.0
yahoo_A3 0.9999999494709021 0.994708

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.9857142350127577 0.9917015987786933 0.9994070057645126
yahoo_A4 0.30046945789415885 0.0025395109248887594 0.21742021220497085
yahoo_A4 0.41726615387920135 0.05588949133176129 0.21242666034263516
yahoo_A4 0.3394495129450406 0.08399765020018418 0.2260219083105766
yahoo_A4 0.7601077663051014 0.8745798784452463 0.8706726462778589
yahoo_A4 0.787233993566725 0.8583349741952166 0.8648633010769219
yahoo_A4 0.7063829281665949 0.7793592934214006 0.8940597232502914
yahoo_A4 0.7176470091487923 0.7367406128838452 0.8024299438007285
yahoo_A4 0.5942491593197877 0.28437223703329084 0.33802973699510486
yahoo_A4 0.4514990830491894 0.22591585259667527 0.3937234466004427
yahoo_A4 0.637770854195864 0.3203459855388856 0.3984048938019873
yahoo_A4 0.3882783568463308 0.1462748538369584 0.5254104996615654
yahoo_A4 0.45070419027970904 0.005591333124443434 0.11338865409017528
yahoo_A4 0.3551401575753364 0.08004726422413729 0.27811871530037546
yahoo_A4 0.07439824225732532 0.0023529415769828785 -0.0
yaho

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.891573,0.806011,0.870127
yahoo_A2,0.918077,0.588413,0.975948
yahoo_A3,0.669662,0.548156,0.612705
yahoo_A4,0.586962,0.429032,0.530933


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2656747906047378 0.13942926544414166 0.40202036749755604
D2 0.2699530282282525 0.5840009300457375 0.5173913038147389


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.265675,0.139429,0.40202
D2,0.269953,0.584001,0.517391


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.20232181144029696 0.5556857798256208 0.521888680406877
smd 0.4884695542368271 0.3952305149924574 0.8583806707095707
smd 0.1932549726720643 0.20515748267090567 0.5625838056472208
smd 0.21862697650832094 0.2697912236319896 0.5725466538793574
smd 0.6895734135281695 0.6764737582044151 0.9031650051289469
smd 0.7246244414787831 0.8286646457356777 0.9061605101649386
smd 0.4878721681463277 0.5107325834696655 0.7280720453715078
smd 0.46572167528145164 0.4675493308539854 0.79431395379695
smd 0.5390030479798098 0.5999410418162284 0.8688727064654109
smd 0.2596273980668859 0.2906094023612728 0.5348854307462712
smd 0.7123918808195437 0.7075520094751071 0.8411063677102815
smd 0.28882915693676936 0.3427217546307107 0.6964020720283004
smd 0.799010741383387 0.8013886587523206 0.9522882557532154
smd 0.522184261586189 0.519626718700251 0.7110344377867217
smd 0.8596218898924686 0.8848318295911204 0.9628033823709572
smd 0.9677418850899386 0.6730995648259787 0.9809677285777375
smd 0.7643821429023572 0.

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.550323,0.537839,0.788543


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(32, 16)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.5599999552000028 0.4615818986261202 0.6562840370168257
D2 0.3870967242455834 0.2628370549147405 0.6306648503888881
D3 0.13333329580247924 0.05020114640982156 0.4879326208064151
D4 0.4117646619377204 0.21541268050975737 0.6792372801443013
D5 0.4090908697314083 0.23429865038944878 0.7701631621533193
D6 0.26666661577778683 0.11078050651544834 0.5299700254641283
D7 0.23999994880001013 0.07232347804945813 0.6231608876362014
D8 0.2077921878816007 0.08365903648905956 0.46594110847483333
D9 0.3909774115665127 0.17438881570305717 0.34967635839082867


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.56,0.461582,0.656284
D2,0.387097,0.262837,0.630665
D3,0.133333,0.050201,0.487933
D4,0.411765,0.215413,0.679237
D5,0.409091,0.234299,0.770163
D6,0.266667,0.110781,0.52997
D7,0.24,0.072323,0.623161
D8,0.207792,0.083659,0.465941
D9,0.390977,0.174389,0.349676


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.44159994989568563 0.39663546677344463 0.6057407593716193


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.4416,0.396635,0.605741


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.6258889969158837 0.5829129570521231 0.7694850127452466


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.625889,0.582913,0.769485
