In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def LSTM_AE(X_train):
    LSTM = layers.LSTM
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            LSTM(64, return_sequences=True),
            LSTM(32),
            layers.RepeatVector(X_train.shape[1]),
            LSTM(32, return_sequences=True),
            LSTM(64),
            layers.Dense(X_train.shape[1] *  X_train.shape[2]),
            layers.Reshape([X_train.shape[1], X_train.shape[2]])
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]        

        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490291289
yahoo_A1 0.9999999166666703 0.6666666055555603 0.9999999656957943
yahoo_A1 0.9999999333333361 0.8333332883333356 0.9999999823333336
yahoo_A1 0.9999998500000123 0.0 0.9999998990476292
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384643
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384643
yahoo_A1 0.9999999166666703 0.6666666055555603 0.999999965714287
yahoo_A1 0.9999999466666691 0.9666666518500432 0.9999999953333334
yahoo_A1 0.9830507941396176 0.9293523348325543 0.9982222175638519
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790099012
yahoo_A1 0.9999999333333361 0.8333332876388914 0.9999999823232326
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790000004
yahoo_A1 0.9999999437500025 0.937499976331382 0.9999999926264045
yahoo_A1 0.9999999166666703 0.6666666055555603 0.9999999656862758
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790291265
yahoo_A1 0.9999999409090937 0.

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.9999999250000032 0.7499999298611175 0.99999997445946
yahoo_A2 0.39999995200000177 0.029599565065717537 0.7813062480555302
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.9999999357142885 0.85714281602041 0.9999999851907259
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594599
yahoo_A2 0.9999999357142885 0.85714281602041 0.9999999851907256
yahoo_A2 0.9999998500000123 0.0 0.9999998992248161
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.9999999357142885 0.85714281602041 0.9999999851907256
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.7272726677685981 0.5005457823469166 0.9234480036252029
yahoo_A2 0.9230768591716004 0.778231254291871 0.9940164400287768
yahoo_A2 0.9999998500000123 0.0 0.9999998992248165
yahoo_A2 0.04166666254340308 0.003546099794540212 0.2486486316223535
yahoo_A

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A3 0.12121210964187425 0.04002614840652498 0.29833743435733734
yahoo_A3 0.7272726644628134 0.5003482816356032 0.8884180635361809
yahoo_A3 0.03174602857142873 0.004347826254870989 0.06557376388067784
yahoo_A3 0.6956521187145592 0.633780198813267 0.9701754274540374
yahoo_A3 0.0833333251736117 0.02895018585278312 0.5142360973174194
yahoo_A3 0.17647057188581453 0.0417867362717166 0.2083362296791469
yahoo_A3 0.12903224571626876 0.053499313985300376 0.5980225881448021
yahoo_A3 0.4444443901234612 0.1274467828200118 0.7295197450632613
yahoo_A3 0.18181813057852592 0.07823224994420575 0.41027568362990496
yahoo_A3 0.6363635809917395 0.46366641536332465 0.8033138309597855
yahoo_A3 0.3749999484375059 0.20717200445068773 0.6151211279127355
yahoo_A3 0.04444444000000022 0.013797127548793673 0.4569671898918313
yahoo_A3 0.17647057188581453 0.06111990430794169 0.35328749400220244
yahoo_A3 0.04724408980097991 0.004545455035313141 0.1074380056963

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.28571425306122533 0.030805088408439683 0.45953388828182185
yahoo_A4 0.1333333084444478 0.04069541345117895 0.8155737290446139
yahoo_A4 0.0624999938476567 0.008771929954168886 0.06666666327777794
yahoo_A4 0.0624999938476567 0.013793759380120599 0.40833331257639
yahoo_A4 0.8571427959183704 0.7204665101496157 0.9482758493980383
yahoo_A4 0.8571427959183704 0.6478770913165992 0.8716132876295546
yahoo_A4 0.6666666000000033 0.29371903542106964 0.8708333108368062
yahoo_A4 0.7499999343750033 0.45707553879336094 0.9050419977982844
yahoo_A4 0.18181812892563337 0.06727245086001755 0.6703389703259841
yahoo_A4 0.137931009750301 0.03244298716416189 0.5222222042407414
yahoo_A4 0.23076918639054037 0.08524146352033943 0.6099137847844034
yahoo_A4 0.0624999938476567 0.012910048946176967 0.4152777487106506
yahoo_A4 0.6666666000000033 0.26932775745143295 0.7086805342155678
yahoo_A4 0.15686272679738736 0.05451583700953531 0.649435015693511
yahoo_A4 0.03361344201680689 0.00887460133137247 0.1147540

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.812298,0.544178,0.878852
yahoo_A2,0.886172,0.454716,0.959843
yahoo_A3,0.421276,0.257755,0.662874
yahoo_A4,0.402644,0.221432,0.633644


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2295534087061448 0.11746109984775643 0.4724353761620009
D2 0.19498578202833744 0.4930315708118209 0.5108385086618026


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.229553,0.117461,0.472435
D2,0.194986,0.493032,0.510839


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.17627464443920743 0.5532991977145021 0.5087209301507045
smd 0.33480171787537694 0.2582434067543807 0.8571194526108868
smd 0.10071937465397049 0.10587473788646473 0.5321369940485321
smd 0.10256405257596121 0.11216633425001105 0.5349034508211135
smd 0.6551723644470901 0.6112281840919805 0.8728396609632012
smd 0.868571379151611 0.8651388441858463 0.9169443490605668
smd 0.23970940609196853 0.3728321726955125 0.5704151362805946
smd 0.34468080107017474 0.35749122477234674 0.670095810283878
smd 0.38938048145728565 0.3629696470615241 0.7205920945958447
smd 0.21837868410447125 0.20433404829605753 0.5084261368470311
smd 0.3106795822037918 0.5267417459590247 0.5939644390765506
smd 0.3260072976244171 0.3489103341057478 0.6533127555560582
smd 0.5225224723343931 0.46902046859840285 0.7502328939696092
smd 0.45751630398564924 0.42363604221093437 0.6598333746517392
smd 0.8102189275347678 0.8229602834990505 0.9188197370389928
smd 0.9499999477500025 0.8942184364363217 0.9760487986827799
smd 0.50563

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.448328,0.467363,0.727736


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.4466018964464179 0.4709263636309237 0.6872088867933055
D2 0.3874345057207924 0.3191630311938948 0.6670535685183272
D3 0.2795698619956095 0.21377744837978835 0.6276422956656296
D4 0.2714681140138614 0.15157386041765725 0.5964651084076671
D5 0.48780483229506666 0.34059125022025316 0.8629874986531321
D6 0.1571194617650775 0.07548490550353279 0.3402590057442746
D7 0.14598536250200825 0.08676010045097446 0.525775727103335
D8 0.17583891011296932 0.07358699564169491 0.3716105748159256
D9 0.3580130914399724 0.18434008598993182 0.39316131380570934


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.446602,0.470926,0.687209
D2,0.387435,0.319163,0.667054
D3,0.27957,0.213777,0.627642
D4,0.271468,0.151574,0.596465
D5,0.487805,0.340591,0.862987
D6,0.157119,0.075485,0.340259
D7,0.145985,0.08676,0.525776
D8,0.175839,0.073587,0.371611
D9,0.358013,0.18434,0.393161


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(16, 8)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.2631578716528182 0.10457807202807193 0.3477392667102059


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.263158,0.104578,0.347739


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = LSTM_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.46991400149588575 0.3260373391614557 0.5948317805942174


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.469914,0.326037,0.594832
