In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def GRU_AE(X_train):
    GRU = layers.GRU
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
            GRU(64, return_sequences=True),
            GRU(32),
            layers.RepeatVector(X_train.shape[1]),
            GRU(32, return_sequences=True),
            GRU(64),
            layers.Dense(X_train.shape[1] *  X_train.shape[2]),
            layers.Reshape([X_train.shape[1], X_train.shape[2]])
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0, callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.5714285142857172 0.1666666606944441 0.33333328888889363
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.33333329444444565 0.09999998800000123 0.49999993750000654
yahoo_A1 0.666666577777783 0.4583332718055626 0.7499999125000085
yahoo_A1 0.666666577777783 0.47499994300000575 0.6666666111111147
yahoo_A1 0.33333329444444565 0.09999998800000123 -0.0
yahoo_A1 0.8888888197530895 0.25000000993055255 -0.0
yahoo_A1 0.5714285142857172 0.1250000006249989 -0.0
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998750000131
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.33333329444444565 0.0 0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.7499999343750033 0.3361110889953716 0.24999997500000196
yahoo_A1 0.7499999343750033 0.16666667444444216 -0.0
yahoo_A1 0.7499999343750033 0.1666666744444422 -0.0
yahoo_A1 0.7499999343750033 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.9999999000000052 0.4999999591666695 0.9999999000000078
yahoo

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.46153841893491365 4.351851412631487e-09 0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.4285713887755125 0.05555556109988678 -0.0
yahoo_A2 0.24999997187500062 0.0 0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.4285713887755125 0.05555556109988678 -0.0
yahoo_A2 0.24999997187500062 0.0 0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.4285713887755125 0.05555556109988678 -0.0
yahoo_A2 0.24999997187500062 0.07142856326530694 -0.0
yahoo_A2 0.46153841893491365 1.3032405600333905e-08 0.0
yahoo_A2 0.4285713887755125 0.09374999960779642 0.12499998593750138
yahoo_A2 0.4285713887755125 0.05555556109988678 -0.0
yahoo_A2 0.24999997187500062 0.07142856326530694 -0.0
yahoo_A2 0.46153841893491365 0.0 0.0
yahoo_A2 0.4285713887755125 0.05555556109988678 -0.0
yahoo_A2 0.24999997187500062 0.0 0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.4285713887755125 0.

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.33333329444444565 0.0 0.0
yahoo_A3 0.4999999500000025 0.10000000133333238 -0.0
yahoo_A3 0.7999999360000031 5.277777344711406e-09 0.0
yahoo_A3 0.4999999500000025 0.0 0.0
yahoo_A3 0.9090908429752097 4.083333052458349e-09 0.0
yahoo_A3 0.4999999500000025 1.3333331148236856e-08 0.0
yahoo_A3 0.9999999000000052 6.826387587441943e-08 0.9999999125000061
yahoo_A3 0.6666666074074105 0.1250000081249981 0.0
yahoo_A3 0.7999999360000031 0.0 0.0
yahoo_A3 0.9999999333333361 8.333331882493106e-08 0.0
yahoo_A3 0.9090908429752097 0.0 0.0
yahoo_A3 0.9090908429752097 0.2500000140138856 0.0
yahoo_A3 0.0 0.0 0.0
yahoo_A3 0.7999999360000031 0.16666667972221955 -0.0
yahoo_A3 0.28571425306122533 0.08333332361111209 -0.0
yahoo_A3 0.9090908429752097 0.0 0.0
yahoo_A3 0.4999999500000025 0.0 0.0
yahoo_A3 0.4999999500000025 0.10000000133333238 -0.0
yahoo_A3 0.6666666074074105 2.374999645729026e-08 0.0
yahoo_A3 0.28571425306122533 0.08333332361111209 0.0
yahoo_A3 0.28571425306122533 0.0 0.0
yahoo_A3 0.666666

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.6666666074074105 0.0 0.0
yahoo_A4 0.6666666000000033 0.16666666527777654 0.4999999625000022
yahoo_A4 0.6666666074074105 0.0 0.0
yahoo_A4 0.6666666000000033 0.16666666527777654 0.4999999625000022
yahoo_A4 0.9090908429752097 0.2500000140138856 -0.0
yahoo_A4 0.7999999360000031 0.16666667972221952 -0.0
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.9090908429752097 0.0 0.0
yahoo_A4 0.7999999360000031 0.16666667972221955 -0.0
yahoo_A4 0.9090908429752097 0.0 0.0
yahoo_A4 0.6666666074074105 0.1250000081249981 -0.0
yahoo_A4 0.6666666074074105 0.1250000081249981 -0.0
yahoo_A4 0.28571425306122533 0.0 0.0
yahoo_A4 0.28571425306122533 0.08333332361111209 -0.0
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.9090908429752097 0.0 0.0
yahoo_A4 0.7999999360000031 0.0 0.0
yahoo_A4 0.9999999333333361 3.3333332149565532e-09 0.0
yahoo_A4 0.9090908429752097 2.2347219975155517e-08 0.0
yahoo_A4 0.4999999500000025 0.10000000133333238 -0.0
yahoo_A4 0.799999936000003

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.481222,0.133805,0.171131
yahoo_A2,0.381499,0.066014,0.030268
yahoo_A3,0.692513,0.062675,0.014493
yahoo_A4,0.686987,0.077092,0.046883


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2690631569008083 0.06117763092305466 0.27741412363432816
D2 0.3097712780075326 0.1636229667588922 0.5190291414640057


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.269063,0.061178,0.277414
D2,0.309771,0.163623,0.519029


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.23153690558205123 0.06546275382269463 0.49999999900806086
smd 0.14177213865470395 0.019378551682101505 0.1002000981803754
smd 0.21262456301365543 0.11480568637130903 0.6180718709158651
smd 0.22222217866941857 0.10794717298950028 0.606271679273233
smd 0.11111109012345965 0.2732198000266741 0.5909171349495674
smd 0.49448119788508593 0.4200610966303163 0.5873050185814976
smd 0.2983682728435532 0.14360259194649763 0.42787005428459035
smd 0.23557690218079871 0.10071274654491602 0.47615394453280824
smd 0.28037378282820297 0.15584893792477866 0.5844304939451155
smd 0.506024048395998 0.2373524227892881 0.7373560841418461
smd 0.12690354134994566 0.023402233282390363 0.4159883692327554
smd 0.30092590022183857 0.11232677607701491 0.20911403917924618
smd 0.2635293887601403 0.08965424117981007 0.37396707986556543
smd 0.08547007725180877 0.022321428454838966 0.4999999973831775
smd 0.21307504139204825 0.07034008198078359 0.21972515625383374
smd 0.026737965262089488 0.006775067613340092 0.499999

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.208909,0.091432,0.417052


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(64, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.35294112387543924 0.28612950825491223 0.43164250671247884
D2 0.3043477989603044 0.0 0.0
D3 0.11009173251409898 0.0 0.0
D4 0.2999999730000019 0.0 0.0
D5 0.27450977916186264 0.06876302339393592 0.15572715306014787
D6 0.24999997187500062 0.14872876884196004 0.5370583624260562
D7 0.05079364581103594 0.0 0.0
D8 0.21276593798098847 0.0 0.0
D9 0.417910413455115 0.0 0.0


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.352941,0.28613,0.431643
D2,0.304348,0.0,0.0
D3,0.110092,0.0,0.0
D4,0.3,0.0,0.0
D5,0.27451,0.068763,0.155727
D6,0.25,0.148729,0.537058
D7,0.050794,0.0,0.0
D8,0.212766,0.0,0.0
D9,0.41791,0.0,0.0


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(512, 256)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.7499999343750033 0.24999999249999968 -0.0


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.75,0.25,0.0


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(64, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = GRU_AE(X_train)
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.42201831455264965 0.0 0.0


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.422018,0.0,0.0
