In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets
from AE_Models import CNN_AE, LSTM_AE, GRU_AE, CNN_Bi_LSTM_AE, Wavenet

from tqdm.notebook import tqdm
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Bidirectional, LSTM, LSTMCell, GRU, GRUCell, Reshape, Dropout, GaussianNoise, Concatenate, Lambda, RepeatVector, TimeDistributed

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def Hetero_TAD(X_train):
    tf.keras.backend.clear_session()

    BATCH_SIZE = 128

    X_train_reverse = np.flip(X_train, axis=1)
    seq_length, dim = X_train.shape[1], X_train.shape[2]

    cnn = tf.keras.models.Sequential(CNN_AE(seq_length, dim, 'relu'))
    lstm = tf.keras.models.Sequential(LSTM_AE(seq_length, dim, 'relu'))
    gru = tf.keras.models.Sequential(GRU_AE(seq_length, dim, 'relu'))
    cnn_lstm = tf.keras.models.Sequential(CNN_Bi_LSTM_AE(seq_length, dim, 'relu'))
    wavenet = tf.keras.models.Sequential(Wavenet(seq_length, dim, 'relu'))


    AEs = [cnn, lstm ,gru, cnn_lstm, wavenet]
    model = Model(inputs=[ae.input for ae in AEs], outputs=[ae.output for ae in AEs])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')

    history = model.fit([X_train for _ in range(len(AEs))], [X_train_reverse for _ in range(len(AEs))], batch_size=BATCH_SIZE, epochs=50, validation_split=0.3, verbose=0, callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = Hetero_TAD(X_train)
        X_test = [X_test for _ in range(len(model.outputs))]
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test[0], X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490291289
yahoo_A1 0.9999999166666703 0.6666666055555603 0.9999999656957942
yahoo_A1 0.9090908429752097 0.7547169388267891 0.9166666504722225
yahoo_A1 0.9999998500000123 0.0 0.9999998990476292
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384642
yahoo_A1 0.9999998500000123 0.49999988541668605 0.999999946394234
yahoo_A1 0.9999999166666703 0.6666666055555603 0.999999965714287
yahoo_A1 0.9836065041655494 0.9655734618343037 0.9995555508909629
yahoo_A1 0.906249947363284 0.9301137656764635 0.9850574666653192
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790099015
yahoo_A1 0.9090908429752097 0.6957671552853911 0.9234006570779345
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790000004
yahoo_A1 0.9999999437500025 0.9374999763313819 0.9999999926264045
yahoo_A1 0.9999999166666703 0.6666666055555603 0.9999999656862757
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790291266
yahoo_A1 0.9999999409090937 0.

In [None]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

### NASA

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_nasa]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Hetero_TAD(X_train)
        X_test = [X_test for _ in range(len(model.outputs))]
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test[0], X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

In [None]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

### SMD

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_smd]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Hetero_TAD(X_train)
        X_test = [X_test for _ in range(len(model.outputs))]
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test[0], X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')
        
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

In [None]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

### ECG

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_ecg]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Hetero_TAD(X_train)
        X_test = [X_test for _ in range(len(model.outputs))]
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test[0], X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

In [None]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

### Power Demand

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_power_demand]:
    datasets = loader(16, 8)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Hetero_TAD(X_train)
        X_test = [X_test for _ in range(len(model.outputs))]
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test[0], X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

In [None]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

### 2D Gesture

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_gesture]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Hetero_TAD(X_train)
        X_test = [X_test for _ in range(len(model.outputs))]
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test[0], X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

In [None]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()