In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

### Yahoo S5

In [3]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [4]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        GRU = layers.GRU
        model = keras.Sequential(
            [
                layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
                GRU(64, return_sequences=True),
                GRU(32),
                layers.RepeatVector(X_train.shape[1]),
                GRU(32, return_sequences=True),
                GRU(64),
                layers.Dense(X_train.shape[1] *  X_train.shape[2]),
                layers.Reshape([X_train.shape[1], X_train.shape[2]])
            ]
        )
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
        history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                        callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/67 [00:00<?, ?it/s]

yahoo_A1 0.5714285224489823 0.4271825012201472 0.5972221909722236
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998888889001
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.46153841893491365 0.14999999350000026 0.49999997619047704
yahoo_A1 0.6666666133333363 0.6263888375648194 0.6033333085055562
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.7499999343750033 0.6880555048500808 0.6449999730750008
yahoo_A1 0.33333330000000166 0.06250000074652727 0.3749999578125042
yahoo_A1 0.8571427959183704 0.29166666374007905 -0.0
yahoo_A1 0.6666666133333363 0.5338888478286409 0.6366666305611128
yahoo_A1 0.9999999000000052 0.7499998875000145 0.9999999375000035
yahoo_A1 0.18181816198347142 0.0 0.0
yahoo_A1 0.46153841893491365 0.0 0.0
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998888889001
yahoo_A1 0.6666666133333363 0.32961308272599993 0.31666665186111165
yahoo_A1 0.6666666133333363 0.08333334620204845 -0.0
yahoo_A1 0.6666666111111

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A3 0.4999999562500026 2.5252523422138635e-09 0.0
yahoo_A3 0.5882352456747433 0.14467592519269648 -0.0
yahoo_A3 0.736842050969532 3.0862191832992026e-09 0.0
yahoo_A3 0.4999999562500026 0.0 0.0
yahoo_A3 0.8571428000000028 6.079996228979668e-09 0.0
yahoo_A3 0.4999999562500026 0.0 0.0
yahoo_A3 0.749999943750003 0.41666662648401226 0.8999999420000038
yahoo_A3 0.6666666148148177 0.07142858473067244 -0.0
yahoo_A3 0.799999944000003 1.741834889734491e-08 0.0
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.9565216809073751 0.19875000447749328 0.0
yahoo_A3 0.9523808934240391 0.5138888721369566 -0.0
yahoo_A3 0.799999944000003 0.10000001875168127 0.0
yahoo_A3 0.3076922769230785 0.06250000001420417 0.2999999720000025
yahoo_A3 0.736842050969532 0.0 0.0
yahoo_A3 0.15384613727810673 0.04166666215277823 -0.0
yahoo_A3 0.9090908512396723 0.0 0.0
yahoo_A3 0.4999999562500026 0.0 0.0
yahoo_A3 0.4999999562500026 0.05555556362513912 -0.0
yahoo_A3 0.5882352456747433 0.06250001071706508 -0.0
yahoo_A3 0.28571425714285853 

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A4 0.6666666148148177 0.0 0.0
yahoo_A4 0.0 0.0 0.0
yahoo_A4 0.5333332871111137 0.3263392625770806 0.3958333162326395
yahoo_A4 0.6666666148148177 2.1465365509595665e-08 0.0
yahoo_A4 0.666666611111114 0.10000001155158537 0.49999998125000056
yahoo_A4 0.9090908512396723 0.585813467106676 0.49999995833333605
yahoo_A4 0.736842050969532 0.2739748628486687 -0.0
yahoo_A4 0.4999999562500026 0.0 0.0
yahoo_A4 0.0 0.0 0.0
yahoo_A4 0.5882352456747433 0.0 0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.9090908512396723 3.449975957793404e-09 0.0
yahoo_A4 0.736842050969532 0.083333349346138 0.0
yahoo_A4 0.9090908512396723 2.041245759923482e-09 0.0
yahoo_A4 0.6666666148148177 0.07142858473067246 -0.0
yahoo_A4 0.736842050969532 0.20064484155150275 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.3999999626666689 0.0 0.0
yahoo_A4 0.16666664861111138 0.045454540495868265 0.09090908099173658
yahoo_A4 0.4999999562500026 0.0 0.0
yahoo_A4 0.9090908512396723 0.0 0.0
yahoo_A4 0.8571428000000028 0.0 0.0
yahoo_A4 0.99999994166666

In [5]:
yahoo_results = pd.DataFrame(total_scores)

In [6]:
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.456335,0.181156,0.226509
yahoo_A2,0.0,0.0,0.0
yahoo_A3,0.646223,0.071011,0.046151
yahoo_A4,0.553478,0.058441,0.047


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        GRU = layers.GRU
        model = keras.Sequential(
            [
                layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
                GRU(64, return_sequences=True),
                GRU(32),
                layers.RepeatVector(X_train.shape[1]),
                GRU(32, return_sequences=True),
                GRU(64),
                layers.Dense(X_train.shape[1] *  X_train.shape[2]),
                layers.Reshape([X_train.shape[1], X_train.shape[2]])
            ]
        )
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
        history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                        callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2624006636053196 0.0660586020802229 0.2584784535712093
D2 0.29583972461604124 0.1395964108494039 0.5077868659595938


In [9]:
nasa_results = pd.DataFrame(total_scores)

In [10]:
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.262401,0.066059,0.258478
D2,0.29584,0.139596,0.507787


### SMD

In [11]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [12]:
for loader in [load_smd]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        GRU = layers.GRU
        model = keras.Sequential(
            [
                layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
                GRU(64, return_sequences=True),
                GRU(32),
                layers.RepeatVector(X_train.shape[1]),
                GRU(32, return_sequences=True),
                GRU(64),
                layers.Dense(X_train.shape[1] *  X_train.shape[2]),
                layers.Reshape([X_train.shape[1], X_train.shape[2]])
            ]
        )
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
        history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                        callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.21943571700356884 0.06161971811210573 0.49999999837062536
smd 0.1295546436492987 0.017747097966200467 0.12835768310313056
smd 0.17391299612477615 0.07791452892244566 0.552305021960078
smd 0.1913043199092667 0.09306108803792014 0.5806148402591004
smd 0.0737327117246072 0.0191387557324695 0.56113536391981
smd 0.44525543871543777 0.26610936255540923 0.5315064778577574
smd 0.2647058591911784 0.11349762862276463 0.43835812365718074
smd 0.19157086375713953 0.07504709717600982 0.4223545900913297
smd 0.2380951879818695 0.1386990655871239 0.5653067253928539
smd 0.42857138576468035 0.17603861235881257 0.6989371591639427
smd 0.14285712755102062 0.5198084625652851 0.538461532924218
smd 0.2710622474312033 0.09507942697285253 0.26663386771494707
smd 0.2290076131344345 0.1756536191468478 0.459693531987835
smd 0.08026755077012628 0.025588471562191274 0.48954545028789254
smd 0.22222220082304656 0.12008803004227606 0.2956929129115833
smd 0.02499999751041683 0.006329113710409663 0.4999999831196587


In [13]:
smd_results = pd.DataFrame(total_scores)

In [14]:
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.182188,0.085415,0.437048
