In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

### Yahoo S5

In [3]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [1]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        Conv1D = layers.Conv1D
        Conv1DT = layers.Conv1DTranspose
        Dropout = layers.Dropout
        
        model = keras.Sequential(
            [
                layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
                Conv1D(32, 7, padding='same', strides=2, activation='relu'),
                Dropout(0.2),
                Conv1D(16, 7, padding='same', strides=2, activation='relu'),
                Conv1DT(16, 7, padding='same', strides=2, activation='relu'),
                Dropout(0.2),
                Conv1DT(32, 7, padding='same', strides=2, activation='relu'),
                Conv1DT(1, 7, padding='same')
            ]
        )
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
        history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                        callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

NameError: name 'load_yahoo_A1' is not defined

In [5]:
yahoo_results = pd.DataFrame(total_scores)

In [6]:
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.555433,0.243015,0.378667
yahoo_A2,0.0,0.0,0.0
yahoo_A3,0.650213,0.056989,0.056817
yahoo_A4,0.557909,0.039513,0.028171


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        Conv1D = layers.Conv1D
        Conv1DT = layers.Conv1DTranspose
        Dropout = layers.Dropout
        
        model = keras.Sequential(
            [
                layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
                Conv1D(32, 7, padding='same', strides=2, activation='relu'),
                Dropout(0.2),
                Conv1D(16, 7, padding='same', strides=2, activation='relu'),
                Conv1DT(16, 7, padding='same', strides=2, activation='relu'),
                Dropout(0.2),
                Conv1DT(32, 7, padding='same', strides=2, activation='relu'),
                Conv1DT(1, 7, padding='same')
            ]
        )
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
        history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                        callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2652406181306115 0.10903357368670072 0.3836433988649448
D2 0.2699530282282525 0.5796531045422718 0.5173913038147389


In [9]:
nasa_results = pd.DataFrame(total_scores)

In [10]:
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.265241,0.109034,0.383643
D2,0.269953,0.579653,0.517391


### SMD

In [11]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [12]:
for loader in [load_smd]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        Conv1D = layers.Conv1D
        Conv1DT = layers.Conv1DTranspose
        Dropout = layers.Dropout
        
        model = keras.Sequential(
            [
                layers.InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
                Conv1D(32, 7, padding='same', strides=2, activation='relu'),
                Dropout(0.2),
                Conv1D(16, 7, padding='same', strides=2, activation='relu'),
                Conv1DT(16, 7, padding='same', strides=2, activation='relu'),
                Dropout(0.2),
                Conv1DT(32, 7, padding='same', strides=2, activation='relu'),
                Conv1DT(1, 7, padding='same')
            ]
        )
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
        history = model.fit(X_train, X_train, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                        callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)])    
        
        scores = evaluate(X_test, model.predict(X_test), y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.21943571700356884 0.5598591033089078 0.5142857126097861
smd 0.624999946093754 0.5263455998533138 0.8576988577521705
smd 0.3636363123967009 0.2623451041077363 0.7294203209868252
smd 0.3380281275937362 0.31909059719487676 0.7322580605173925
smd 0.666666611111114 0.6126054493467343 0.7478165842121981
smd 0.4999999513750046 0.561997570382591 0.6825682206959776
smd 0.2974358676765314 0.19374700256213884 0.5638942557957299
smd 0.21212116923784125 0.1770759138401065 0.5731768360733389
smd 0.3333332827160567 0.31677702174821804 0.6932319211315485
smd 0.28363633908892766 0.352998688489399 0.5395027967288474
smd 0.6363635822314088 0.6751050004808337 0.7664711907248292
smd 0.2710622474312033 0.1687067347925086 0.2457314204820946
smd 0.24242421965105748 0.21682850158250142 0.4514773657233134
smd 0.5333332871111137 0.6534552532131934 0.6818181757506887
smd 0.2857142591836752 0.21390300781029029 0.45049872990884565
smd 0.7999999200000041 0.6687762990145876 0.8333333051994312
smd 0.312499971679

In [13]:
smd_results = pd.DataFrame(total_scores)

In [14]:
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.379925,0.373756,0.624099
