In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def CNN_AE(X_train):
    Conv1D = layers.Conv1D
    Conv1DT = layers.Conv1DTranspose
    Dropout = layers.Dropout

    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            Conv1D(32, 7, padding='same', strides=2, activation='relu'),
            Dropout(0.2),
            Conv1D(16, 7, padding='same', strides=2, activation='relu'),
            Conv1DT(16, 7, padding='same', strides=2, activation='relu'),
            Dropout(0.2),
            Conv1DT(32, 7, padding='same', strides=2, activation='relu'),
            Conv1DT(1, 7, padding='same')
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9999999000000052 0.4999999500000039 0.9999999166666722
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998750000131
yahoo_A1 0.9999999000000052 0.7499999000000107 0.9999999166666721
yahoo_A1 0.9999999000000052 0.0 0.9999999166666722
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998750000131
yahoo_A1 0.9999999250000032 0.7499999500000026 0.9999998750000131
yahoo_A1 0.9999999000000052 0.4999999500000039 0.9999999166666721
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998750000131
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.33333329444444565 0.0 0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.7499999343750033 0.16666667555555306 0.0
yahoo_A1 0.7499999343750033 0.16666667444444216 -0.0
yahoo_A1 0.7499999343750033 0.38888886518518634 0.3333333055555574
yahoo_A1 0.7499999343750033 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.9999999166666703 0.6666666111111148 0.

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.46153841893491365 0.11111110867283944 0.0
yahoo_A2 0.46153841893491365 0.06250000713541536 0.0
yahoo_A2 0.46153841893491365 0.15740740094393024 0.09523809070294799
yahoo_A2 0.4285713887755125 0.09999999793939388 -0.0
yahoo_A2 0.24999997187500062 0.07142856326530694 0.0
yahoo_A2 0.46153841893491365 0.11111110867283944 -0.0
yahoo_A2 0.46153841893491365 0.10267857121958815 0.21874998528645928
yahoo_A2 0.24999997187500062 0.07142856326530694 0.0
yahoo_A2 0.46153841893491365 0.11111110867283944 -0.0
yahoo_A2 0.4285713887755125 1.1717170107356334e-08 0.0
yahoo_A2 0.24999997187500062 0.07142856326530694 -0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 0.0
yahoo_A2 0.4285713887755125 0.055555562177327655 -0.0
yahoo_A2 0.4285713887755125 0.10714285482941653 0.24999997187500275
yahoo_A2 0.24999997187500062 0.07142856326530694 -0.0
yahoo_A2 0.46153841893491365 0.11111110867283944 0.0
yahoo_A2 0.49999995416666926 0.07142857913832053 0.24999998854166702
yahoo_A2 0.24999997187500062 

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.4999999500000025 0.0 0.0
yahoo_A3 0.4999999500000025 0.2499999789583348 0.4999999500000044
yahoo_A3 0.7999999360000031 0.0 0.0
yahoo_A3 0.4999999500000025 0.0 0.0
yahoo_A3 0.9090908429752097 0.0 0.0
yahoo_A3 0.4999999500000025 0.0 0.0
yahoo_A3 0.7999999360000031 0.29166666104166605 -0.0
yahoo_A3 0.6666666074074105 7.499999185480748e-09 0.0
yahoo_A3 0.7999999360000031 0.0 0.0
yahoo_A3 0.9999999333333361 0.39999998200000086 0.0
yahoo_A3 0.9090908429752097 0.0 0.0
yahoo_A3 0.9090908429752097 4.083333052458349e-09 0.0
yahoo_A3 0.28571425306122533 0.0 0.0
yahoo_A3 0.7999999360000031 0.0 0.0
yahoo_A3 0.28571425306122533 0.0 0.0
yahoo_A3 0.9090908429752097 1.0541665828899795e-08 0.0
yahoo_A3 0.4999999500000025 0.0 0.0
yahoo_A3 0.4999999500000025 0.0 0.0
yahoo_A3 0.6666666074074105 7.499999185480748e-09 0.0
yahoo_A3 0.28571425306122533 0.0 0.0
yahoo_A3 0.28571425306122533 0.0 0.0
yahoo_A3 0.6666666074074105 0.0 0.0
yahoo_A3 0.9090908429752097 0.0 0.0
yahoo_A3 0.28571425306122533 0.0

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.6666666074074105 0.0 0.0
yahoo_A4 0.6666666000000033 0.16666666527777654 0.4999999625000022
yahoo_A4 0.6666666074074105 0.0 0.0
yahoo_A4 0.4999999500000025 0.16666665555555618 -0.0
yahoo_A4 0.9999998500000123 0.8999998635139098 0.9999998000000299
yahoo_A4 0.7999999360000031 0.2777777723148144 -0.0
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.9090908429752097 4.083333052458349e-09 0.0
yahoo_A4 0.7999999360000031 1.4444442978045904e-08 0.0
yahoo_A4 0.9090908429752097 0.0 0.0
yahoo_A4 0.6666666074074105 0.19999999349999997 -0.0
yahoo_A4 0.6666666074074105 0.0 0.0
yahoo_A4 0.28571425306122533 0.0 0.0
yahoo_A4 0.28571425306122533 0.08333332361111209 -0.0
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.9090908429752097 0.0 0.0
yahoo_A4 0.7999999360000031 5.277777344711406e-09 0.0
yahoo_A4 0.9999999333333361 0.0 0.0
yahoo_A4 0.9090908429752097 0.0 0.0
yahoo_A4 0.4999999500000025 1.3333331148236856e-08 0.0
yahoo_A4 0.7999999360000031 0.1666666715

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.578444,0.200397,0.333333
yahoo_A2,0.404229,0.116375,0.108988
yahoo_A3,0.70135,0.044777,0.022343
yahoo_A4,0.692266,0.057859,0.047256


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.26965063099188846 0.11596555596168923 0.396472600547932
D2 0.28464975199693254 0.5023415037551785 0.5125572784281391


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.269651,0.115966,0.396473
D2,0.28465,0.502342,0.512557


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.23153690558205123 0.5632053916219133 0.5172413782842009
smd 0.48780483236169325 0.4622570630786299 0.8528224338038802
smd 0.2666666278628313 0.19349110532959493 0.6505965928771728
smd 0.3384614877159836 0.2669679707861683 0.6876784801744443
smd 0.7407406869684531 0.6729027939848469 0.8117917793988815
smd 0.5114942087247363 0.5633697101710562 0.6821719975724383
smd 0.31192657420531683 0.21179727231651138 0.5338121623999641
smd 0.23958329890408475 0.21073495187428443 0.5654520839198749
smd 0.37777772733333986 0.3126415686033896 0.6681635057182809
smd 0.29953914490114675 0.33730902837773014 0.5419787439271754
smd 0.6111110652777806 0.5797333993481124 0.7612790663387384
smd 0.29953914490114675 0.1564205937552239 0.23750587008511434
smd 0.2635293887601403 0.18825856244921163 0.42315665535651015
smd 0.46153842248520954 0.6406249788767215 0.6499999965981308
smd 0.23999997792000163 0.18522467569580525 0.39050511182598874
smd 0.8888888197530895 0.7013549728560032 0.8999999817527475
smd 0.

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.397379,0.3781,0.617796


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(64, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.46153841893491365 0.37604365911978516 0.5103950930733676
D2 0.333333303968256 0.2137915582852804 0.49739582400987425
D3 0.11009173251409898 0.0 0.0
D4 0.2999999730000019 0.0 0.0
D5 0.27586203043995716 0.11686293387607471 0.40669239931144613
D6 0.3809523328798237 0.16954827545277182 0.6394009114933956
D7 0.05079364581103594 0.0 0.0
D8 0.21276593798098847 0.0 0.0
D9 0.417910413455115 1.4125024236541218e-10 0.0


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.461538,0.3760437,0.510395
D2,0.333333,0.2137916,0.497396
D3,0.110092,0.0,0.0
D4,0.3,0.0,0.0
D5,0.275862,0.1168629,0.406692
D6,0.380952,0.1695483,0.639401
D7,0.050794,0.0,0.0
D8,0.212766,0.0,0.0
D9,0.41791,1.412502e-10,0.0


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(512, 256)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.7499999343750033 0.0 0.0


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.75,0.0,0.0


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(64, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.42201831455264965 0.0 0.0


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.422018,0.0,0.0
