In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def CNN_AE(X_train):
    Conv1D = layers.Conv1D
    Conv1DT = layers.Conv1DTranspose
    Dropout = layers.Dropout

    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            Conv1D(32, 7, padding='same', strides=2, activation='relu'),
            Dropout(0.2),
            Conv1D(16, 7, padding='same', strides=2, activation='relu'),
            Conv1DT(16, 7, padding='same', strides=2, activation='relu'),
            Dropout(0.2),
            Conv1DT(32, 7, padding='same', strides=2, activation='relu'),
            Conv1DT(1, 7, padding='same')
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.7716534937317905 0.7303390703570484 0.953197036517849
yahoo_A1 0.9999999416666693 0.9166666376815789 0.9999999913849764
yahoo_A1 0.9999999456521764 0.956521721004026 0.9999999953614762
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999797237572
yahoo_A1 0.9999999357142885 0.85714281602041 0.999999985436508
yahoo_A1 0.9999999357142885 0.85714281602041 0.999999985436508
yahoo_A1 0.999999948484851 0.9848484768185841 0.9999999981601732
yahoo_A1 0.9999999492063519 0.9920634873748226 0.9999999987844086
yahoo_A1 0.9763779020150066 0.9821007745918511 0.9954306617391253
yahoo_A1 0.9999999487179513 0.9871794801786323 0.9999999983719279
yahoo_A1 0.9999999492592618 0.9925925880873757 0.9999999988187307
yahoo_A1 0.9999999487654349 0.9876543141718079 0.9999999984095602
yahoo_A1 0.9999999483870992 0.9838709593726699 0.9999999980537634
yahoo_A1 0.999999944444447 0.9444444227123632 0.9999999941537467
yahoo_A1 0.9999999487013013 0.9870129799871572 0.9999999983645984
yahoo_A1 0.9999999476

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.9999999485074653 0.9850746188895331 0.999999998349235
yahoo_A2 0.8590603520381993 0.8528450589617738 0.9878120253586382
yahoo_A2 0.9999999485074653 0.9850746189780145 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9903846098848412 0.9999999988797316
yahoo_A2 0.9999998500000123 0.0 0.999999899784027
yahoo_A2 0.9999999485074653 0.9701492469763784 0.999999998349235
yahoo_A2 0.9999999490384642 0.9903846098817337 0.9999999988797316
yahoo_A2 0.9999998500000123 0.0 0.9999998997840273
yahoo_A2 0.9999999485074653 0.985074618960681 0.999999998349235
yahoo_A2 0.9999999490384642 0.990384609835426 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.9999999485074653 0.9850746188455715 0.9999999983492351
yahoo_A2 0.9999999490384642 0.9903846098810309 0.9999999988797315
yahoo_A2 0.9999999490384642 0.9903846098857108 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.298200486150636 0.1414150221011443 0.7019193213971073
yahoo_A2 0.

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.25396823169564314 0.0015873026417745047 0.16489361206808148
yahoo_A3 0.4718162453563256 0.22209870636002088 0.42188740448584366
yahoo_A3 0.999999949275365 0.992753618826158 0.9999999989442363
yahoo_A3 0.8571428059882805 0.8775992369265865 0.9641904331241491
yahoo_A3 0.9999999495073918 0.9950738884708803 0.9999999990854482
yahoo_A3 0.9785407216931633 0.9882099406251559 0.9988159843564312
yahoo_A3 0.5852089616298454 0.24717162769523937 0.11736262264879466
yahoo_A3 0.4742646695278736 0.24535580179246758 0.4626097375177597
yahoo_A3 0.5210083646578659 0.4394802039189864 0.6235123293388442
yahoo_A3 0.7134502462945892 0.5807715535252748 0.5295804679787006
yahoo_A3 0.7771202675863386 0.7453082512081167 0.7836141062225166
yahoo_A3 0.9456521234360258 0.984770787649156 0.9927331483190937
yahoo_A3 0.6241134298475963 0.2801648470208238 0.8404796688320105
yahoo_A3 0.6766916843462069 0.030347291715273852 0.02028680451929492
yahoo_A3 0.20895520041583385 0.07295269649418028 0.655904919633133

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.8101265333974286 0.8189026420900345 0.8159884347383042
yahoo_A4 0.2757111358464748 0.0810605536988216 0.1955534108692667
yahoo_A4 0.41726615387920135 0.02436990568235497 0.289129429825014
yahoo_A4 0.2970296775316166 0.08466546601403058 0.22520565550479696
yahoo_A4 0.9974554202306287 0.9948719911884788 0.9999790890766338
yahoo_A4 0.906148817004433 0.9498275969111786 0.9646029363635006
yahoo_A4 0.9909090400082671 0.9913119437163782 0.9997987270416937
yahoo_A4 0.9743589233609493 0.983626906114527 0.9977756485488377
yahoo_A4 0.6835442570421438 0.465952224699498 0.6670674626884339
yahoo_A4 0.9644268267149958 0.9882112632014334 0.9983473546692888
yahoo_A4 0.7627627151187253 0.877755625573554 0.8719532279582499
yahoo_A4 0.3882783568463308 0.009799171746010625 0.13214818059807854
yahoo_A4 0.624390196387868 0.5955139846368949 0.7343365936046167
yahoo_A4 0.46931403573095315 0.2891306410073324 0.6934711015939846
yahoo_A4 0.07439824225732532 0.0035933378337895576 0.06533406297355433
yah

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.888742,0.829842,0.905973
yahoo_A2,0.981968,0.649471,0.995692
yahoo_A3,0.771658,0.662304,0.745719
yahoo_A4,0.751491,0.655839,0.728629


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.25300017462469454 0.13766862716749678 0.38522549868301587
D2 0.2699530282282525 0.5840009300406523 0.5173913038147389


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.253,0.137669,0.385225
D2,0.269953,0.584001,0.517391


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.20232181144029696 0.5753855920456366 0.521888680406877
smd 0.47946607091009424 0.44955342114354546 0.9030968125116782
smd 0.27509146250568944 0.20231886140329036 0.6860434886944784
smd 0.28550125499121576 0.26944646088944874 0.6998155306392354
smd 0.6894922795420774 0.7543820647819445 0.9604605779386108
smd 0.721632979044796 0.7172255036672519 0.7808979685499313
smd 0.28757856738135573 0.20055663567378268 0.5806954358516034
smd 0.27698691100102835 0.24621629926606425 0.6217732767916516
smd 0.4521223633297181 0.49493627270878787 0.8533029301151961
smd 0.2596273980668859 0.2907512107322719 0.5346442447268345
smd 0.6354114217912613 0.7106700874099949 0.8480261100537623
smd 0.23641788958652887 0.2075002243402549 0.3100562181438564
smd 0.4630716726006041 0.43856751904558683 0.85980375415138
smd 0.5201026132652724 0.5219560657347636 0.711015863910138
smd 0.3936269578700833 0.39479251554743977 0.8023227653823595
smd 0.9433961761302981 0.9588987106619316 0.9857944131317462
smd 0.76438214

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.483427,0.488078,0.74228


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(32, 16)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.6666666172839535 0.5287067732316802 0.6416972995475905
D2 0.4444443950617328 0.3227746467562111 0.5713364930570336
D3 0.10426539286179642 0.031830835018159885 0.29515458853807425
D4 0.25806449047867025 0.11987751195222267 0.493926547264806
D5 0.41025636765286383 0.2172037509032913 0.7438228359067329
D6 0.24242419320478445 0.11312559835855264 0.549425107255294
D7 0.24999997500000123 0.08985639743531602 0.49780569282665876
D8 0.19753084615150285 0.08033499415600068 0.4321854815934826
D9 0.39393936170799143 0.17883841324903912 0.34472812311541406


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.666667,0.528707,0.641697
D2,0.444444,0.322775,0.571336
D3,0.104265,0.031831,0.295155
D4,0.258064,0.119878,0.493927
D5,0.410256,0.217204,0.743823
D6,0.242424,0.113126,0.549425
D7,0.25,0.089856,0.497806
D8,0.197531,0.080335,0.432185
D9,0.393939,0.178838,0.344728


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.3606206231571944 0.11922375084117279 0.2426117129441615


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.360621,0.119224,0.242612


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.4203232924120907 0.35215139459909023 0.3754227420265823


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.420323,0.352151,0.375423
