In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def CNN_AE(X_train):
    Conv1D = layers.Conv1D
    Conv1DT = layers.Conv1DTranspose
    Dropout = layers.Dropout

    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            Conv1D(32, 7, padding='same', strides=2, activation='relu'),
            Dropout(0.2),
            Conv1D(16, 7, padding='same', strides=2, activation='relu'),
            Conv1DT(16, 7, padding='same', strides=2, activation='relu'),
            Dropout(0.2),
            Conv1DT(32, 7, padding='same', strides=2, activation='relu'),
            Conv1DT(1, 7, padding='same')
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490291289
yahoo_A1 0.9999999166666703 0.6666666055555603 0.9999999656957943
yahoo_A1 0.9999999333333361 0.8333332883333358 0.9999999823333336
yahoo_A1 0.9999998500000123 0.0 0.9999998990476292
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384643
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384643
yahoo_A1 0.9999999166666703 0.6666666055555603 0.999999965714287
yahoo_A1 0.9999999466666691 0.9666666518498016 0.9999999953333334
yahoo_A1 0.8823528892733591 0.9094599890841724 0.9782835203371475
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790099013
yahoo_A1 0.9999999333333361 0.833333288055558 0.9999999823232326
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790000004
yahoo_A1 0.9999999437500025 0.9374999763313819 0.9999999926264045
yahoo_A1 0.8571427836734729 0.5694443936342632 0.996731991942203
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790291266
yahoo_A1 0.9999999409090937 0.9

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.9999999166666703 0.749999935951183 0.999999972162163
yahoo_A2 0.39999995200000177 0.008503313032368153 0.6310810321476181
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.9999999357142885 0.85714281602041 0.9999999851907257
yahoo_A2 0.11111109938271617 0.029411761591695812 0.8759689039721259
yahoo_A2 0.9999999250000032 0.7499999447916703 0.99999997445946
yahoo_A2 0.8333332708333363 0.6203171719716974 0.9566192827665954
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.9999999250000032 0.7499999447916703 0.99999997445946
yahoo_A2 0.8333332708333363 0.6136237947537577 0.9502617647605214
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.8571427836734729 0.5126575210237935 0.9114864632066294
yahoo_A2 0.4444444000000022 0.19262772480285628 0.7041261412999436
yahoo_A2 0.8571427948979622 0.7897958801411313 0.9970082126097514
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.06666666007407451 0.0172408960011654 0.597297

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.03174602857142873 0.004950495156031765 0.18032785067186416
yahoo_A3 0.12213739292582114 0.04227877709170478 0.3132697001826912
yahoo_A3 0.9999999333333361 0.8333332883333356 0.9999999824858761
yahoo_A3 0.03174602857142873 0.004098360849020119 0.008196720485084744
yahoo_A3 0.5714285224489823 0.4687521013054354 0.8917153893692991
yahoo_A3 0.07692305029586642 0.028248960236113903 0.5444444278912042
yahoo_A3 0.17647057188581453 0.04148068890802058 0.22059433351077232
yahoo_A3 0.1818181617190101 0.07202829940005097 0.6971751287330911
yahoo_A3 0.28571425306122533 0.0172823611456422 0.46370054205854133
yahoo_A3 0.266666618666674 0.10351047604854788 0.6056512730183459
yahoo_A3 0.521739076748587 0.39913492422608116 0.835867437224981
yahoo_A3 0.23529406643599615 0.050817806188318417 0.3818782998027101
yahoo_A3 0.2499999562500052 0.06878306455140207 0.8729507753090592
yahoo_A3 0.17647057188581453 0.019485899949776793 0.2255981316247027
yahoo_A3 0.04724408980097991 0.004629630110916255 

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.28571425306122533 0.03392289417206002 0.48841806755313
yahoo_A4 0.06779660338983085 0.0190501680378287 0.6024589857766746
yahoo_A4 0.0624999938476567 0.004310345585182124 0.041666662465278156
yahoo_A4 0.0624999938476567 0.01222773441679899 0.3499999822083343
yahoo_A4 0.666666611111114 0.4389517564613076 0.8405172288910773
yahoo_A4 0.7692307100591745 0.5655173270468533 0.8922413673881543
yahoo_A4 0.6666666000000033 0.31458734639837443 0.8781249773151047
yahoo_A4 0.5714285142857172 0.23272317595664915 0.6731092296696564
yahoo_A4 0.18749996835937974 0.05675146794716493 0.625635576309137
yahoo_A4 0.1111110753086524 0.016799852227629114 0.3541666486979178
yahoo_A4 0.19512191885782762 0.08162849052940563 0.5950584882405832
yahoo_A4 0.0624999938476567 0.0052083340045297145 0.2083333127430576
yahoo_A4 0.39999995200000177 0.046662638436164486 0.7916666455902784
yahoo_A4 0.13793100071344394 0.04203850478859268 0.49774009881241466
yahoo_A4 0.03174602857142873 0.0062580229883908815 0.03

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.816629,0.559331,0.889003
yahoo_A2,0.872957,0.433416,0.947114
yahoo_A3,0.407245,0.243638,0.679711
yahoo_A4,0.361677,0.186241,0.619618


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2568473867078198 0.12376387839470714 0.4957218778857861
D2 0.19498578202833744 0.5197514823085777 0.5136571536345818


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.256847,0.123764,0.495722
D2,0.194986,0.519751,0.513657


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.17627464443920743 0.5547526859780644 0.5087209301507045
smd 0.4139650398815982 0.3752098426736362 0.9118335911310517
smd 0.18796988460437666 0.10908906041705152 0.6447085927262742
smd 0.1927236656334983 0.1356976672551361 0.6833152410149455
smd 0.6551723644470901 0.6481963211988943 0.9002896146609422
smd 0.8306264009251163 0.8244462380297894 0.872085402809532
smd 0.205623287322007 0.11179991471202623 0.5136467727011585
smd 0.26923074406509107 0.19528321607959997 0.4810656033373526
smd 0.31345821238359434 0.27195931744312946 0.7452769110728638
smd 0.21837868410447125 0.19298357543282252 0.5073620031103687
smd 0.48484843825758006 0.5410463036032302 0.6871739297527
smd 0.31598510338787694 0.23808240025101748 0.3519393455320122
smd 0.26395934918189734 0.24897944771502384 0.792281211558957
smd 0.45569616477327657 0.3872826711297792 0.6570434429630087
smd 0.6814158814550898 0.5945421742507302 0.8794015935257194
smd 0.9499999477500025 0.9180279585302844 0.9760487986827799
smd 0.50563603

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.420586,0.418479,0.706898


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.2784809883352047 0.25679906335242975 0.4006648633008311
D2 0.29411759682353794 0.2582576746647633 0.4750020624424619
D3 0.09531128812118277 0.03347386348043948 0.3860466951534464
D4 0.2532750843919857 0.1580076701354087 0.6053622499608214
D5 0.41064634881522427 0.21724399930731528 0.8191808846979343
D6 0.27809520810521865 0.1341963804569694 0.6940866906463844
D7 0.061170204207496734 0.021317846010913043 0.5325364839906022
D8 0.20538718573062456 0.09305581574523517 0.5293045233681557
D9 0.3580130914399724 0.16097330081161013 0.29152274283296903


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.278481,0.256799,0.400665
D2,0.294118,0.258258,0.475002
D3,0.095311,0.033474,0.386047
D4,0.253275,0.158008,0.605362
D5,0.410646,0.217244,0.819181
D6,0.278095,0.134196,0.694087
D7,0.06117,0.021318,0.532536
D8,0.205387,0.093056,0.529305
D9,0.358013,0.160973,0.291523


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(16, 8)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.26905827243660835 0.09963299658684259 0.32950231533747354


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.269058,0.099633,0.329502


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_AE(X_train)        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.41316682301739205 0.30746230881677095 0.45956570008648984


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.413167,0.307462,0.459566
