In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

### Yahoo S5

In [3]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [4]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        LSTM = layers.LSTM
        model = keras.Sequential(
            [
                layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
                LSTM(64, return_sequences=True),
                LSTM(32),
                layers.RepeatVector(X_train.shape[1]),
                LSTM(32, return_sequences=True),
                LSTM(64),
                layers.Dense(X_train.shape[1] *  X_train.shape[2]),
                layers.Reshape([X_train.shape[1], X_train.shape[2]])
            ]
        )
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
        history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                        callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/67 [00:00<?, ?it/s]

yahoo_A1 0.5714285224489823 0.0833333434821412 0.1666666583333337
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998888889001
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.46153841893491365 0.14999999350000026 0.49999997619047704
yahoo_A1 0.6666666133333363 0.6937499446348423 0.699999968500001
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.8571427836734729 0.8283332727972265 0.9149999600250012
yahoo_A1 0.33333330000000166 0.055555557160493235 0.06249999296875069
yahoo_A1 0.8749999398437528 0.16666668743763857 -0.0
yahoo_A1 0.6666666133333363 0.08333334749999795 0.49999994000000614
yahoo_A1 0.9999999000000052 0.49999995000000386 0.9999999375000034
yahoo_A1 0.18181816198347142 0.0 0.0
yahoo_A1 0.46153841893491365 0.0 0.0
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998888889001
yahoo_A1 0.6666666133333363 0.2899801496316494 0.09999999550000015
yahoo_A1 0.6666666133333363 0.08333334620204845 -0.0
yahoo_A1 0.66666661

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A3 0.4999999562500026 6.464645887944386e-09 0.0
yahoo_A3 0.5882352456747433 0.18356481098050528 -0.0
yahoo_A3 0.736842050969532 0.0 0.0
yahoo_A3 0.4999999562500026 0.0 0.0
yahoo_A3 0.8571428000000028 1.025883832329092e-09 0.0
yahoo_A3 0.4999999562500026 0.05555556362513912 0.0
yahoo_A3 0.8333332708333363 0.6545832784733282 0.8499999622500013
yahoo_A3 0.6666666148148177 2.1465365509595665e-08 0.0
yahoo_A3 0.799999944000003 0.27083332641140634 0.43749994843750517
yahoo_A3 0.0 0.0 0.0
yahoo_A3 0.9565216809073751 0.4661111014103642 0.0
yahoo_A3 0.9523808934240391 0.4166666588267708 0.0
yahoo_A3 0.799999944000003 0.0 0.0
yahoo_A3 0.3636363272727291 8.680554187702419e-09 0.399999971000002
yahoo_A3 0.736842050969532 0.0 0.0
yahoo_A3 0.15384613727810673 0.04166666215277823 -0.0
yahoo_A3 0.9090908512396723 0.0 0.0
yahoo_A3 0.4999999562500026 0.0 0.0
yahoo_A3 0.4999999562500026 0.0 0.0
yahoo_A3 0.5882352456747433 0.0 0.0
yahoo_A3 0.28571425714285853 0.0 0.0
yahoo_A3 0.28571425714285853 6.4

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A4 0.6666666148148177 0.0 0.0
yahoo_A4 0.0 0.0 0.0
yahoo_A4 0.4999999562500026 0.05555556362513912 -0.0
yahoo_A4 0.6666666148148177 0.0 0.0
yahoo_A4 0.6666666000000033 0.24999999136453824 0.8749999453125035
yahoo_A4 0.9090908512396723 0.5694444152819887 0.5833332861111141
yahoo_A4 0.736842050969532 0.12500000901049071 0.249999982500001
yahoo_A4 0.4999999562500026 0.0 0.0
yahoo_A4 0.0 0.0 0.0
yahoo_A4 0.5882352456747433 0.0 0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.9090908512396723 1.6439393062381173e-08 0.0
yahoo_A4 0.736842050969532 1.4367633554077069e-08 0.0
yahoo_A4 0.9090908512396723 0.0 0.0
yahoo_A4 0.6666666148148177 0.0 0.0
yahoo_A4 0.736842050969532 0.08333334928826763 0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.3999999626666689 0.0 0.0
yahoo_A4 0.15384613727810673 0.04166666215277823 -0.0
yahoo_A4 0.4999999562500026 0.0 0.0
yahoo_A4 0.9090908512396723 0.0 0.0
yahoo_A4 0.8571428000000028 0.0 0.0
yahoo_A4 0.9999999416666693 0.7499999500000051 0.0
yahoo_A4 0.9473683612188393 5.92106008

In [5]:
yahoo_results = pd.DataFrame(total_scores)

In [6]:
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.456918,0.137571,0.219337
yahoo_A2,0.0,0.0,0.0
yahoo_A3,0.642584,0.069103,0.044994
yahoo_A4,0.544727,0.047881,0.019306


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        LSTM = layers.LSTM
        model = keras.Sequential(
            [
                layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
                LSTM(64, return_sequences=True),
                LSTM(32),
                layers.RepeatVector(X_train.shape[1]),
                LSTM(32, return_sequences=True),
                LSTM(64),
                layers.Dense(X_train.shape[1] * X_train.shape[2]),
                layers.Reshape([X_train.shape[1], X_train.shape[2]])
            ]
        )
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
        history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                        callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.26609987030716387 0.06071534928748652 0.23365349582437323
D2 0.3108974057317377 0.14059263929554916 0.509857249384132


In [9]:
nasa_results = pd.DataFrame(total_scores)

In [10]:
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.2661,0.060715,0.233653
D2,0.310897,0.140593,0.509857


### SMD

In [11]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [12]:
for loader in [load_smd]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        LSTM = layers.LSTM
        model = keras.Sequential(
            [
                layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
                LSTM(64, return_sequences=True),
                LSTM(32),
                layers.RepeatVector(X_train.shape[1]),
                LSTM(32, return_sequences=True),
                LSTM(64),
                layers.Dense(X_train.shape[1] *  X_train.shape[2]),
                layers.Reshape([X_train.shape[1], X_train.shape[2]])
            ]
        )
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
        history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                        callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.21943571700356884 0.06161971811210573 0.49999999837062536
smd 0.1269841149911827 0.0 0.0
smd 0.1556420088722022 0.032738515035395335 0.44368004935039196
smd 0.1556420088722022 0.02805855430924667 0.35315708233255905
smd 0.07048457463564264 0.018264839945997792 0.539301303066899
smd 0.4370860582693768 0.09905660369385144 0.49999999851540616
smd 0.2666666210975133 0.13237241665827884 0.5041226347653743
smd 0.19157086375713953 0.070299126894654 0.46049303663978836
smd 0.18461536771597778 0.04082120156742021 0.4205974810077555
smd 0.463414583135044 0.2644084922170394 0.6904854853188263
smd 0.1044176607022476 0.027542372657821033 0.49999999592963096
smd 0.2710622474312033 0.07812435888314712 0.1381344429969452
smd 0.22556388959240378 0.03393665165933668 0.49999999642394827
smd 0.08026755077012628 0.025142726056084974 0.4171212084935215
smd 0.18461536771597778 0.013761468306413635 0.49999999143081775
smd 0.02499999751041683 0.006329113710409663 0.4999999831196587
smd 0.1719745064647667

In [13]:
smd_results = pd.DataFrame(total_scores)

In [14]:
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.16846,0.042415,0.399991
