In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tqdm.notebook import tqdm
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input, LSTM, Reshape, Dropout, GaussianNoise, Concatenate, Lambda, RepeatVector

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def SKip_AE(seq_length, dim, N_RES=3, N_LAYERS=1, BATCH_SIZE=128):
    tf.keras.backend.clear_session()
    
    en_inputs = []
    shared_latents = []
    for n in range(N_RES):
        selected_t = [t for t in range(0, seq_length, 2**n)]
        en_input = Input(shape=[len(selected_t), dim])
        en_inputs.append(en_input)

    de_outputs = []
    for n in range(N_RES):
        for l in range(N_LAYERS):
            X = en_inputs[n] if l == 0 else X
            X = GaussianNoise(0.5)(X) if l == 0 else X
            X = LSTM(64, return_sequences=False if l + 1 == N_LAYERS else True)(X)
        h = Dense(32, activation='relu', kernel_regularizer='l1')(X)
        shared_latents.append(h)

    for n in range(N_RES):
        selected_t = [t for t in range(0, seq_length, 2**n)]
        for l in range(N_LAYERS):
            if l == 0:
                X = Concatenate()(shared_latents)
                X = Dense(128, activation='relu')(X)
                X = Concatenate()([X, shared_latents[n]])
                X = RepeatVector(len(selected_t))(X)
            X = LSTM(64, return_sequences=False if l + 1 == N_LAYERS else True)(X)
        rec_x = Dense(len(selected_t) * dim)(X)
        rec_x = Reshape([len(selected_t), dim])(rec_x)
        de_outputs.append(rec_x)

    model = Model(inputs=en_inputs, outputs=de_outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="mse")

    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        N_RES = 3

        seq_length, dim = X_train.shape[1], X_train.shape[2]
        X_train_reverse = np.flip(X_train, axis=1)
        X_test_reverse = np.flip(X_test, axis=1)

        X_train_by_res = []
        X_test_by_res = []
        X_train_by_res_reverse = []
        
        for n in range(N_RES):
            selected_t = [t for t in range(0, seq_length, 2**n)]
            X_train_by_res.append(X_train[:, selected_t, :])
            X_test_by_res.append(X_test[:, selected_t, :])
            X_train_by_res_reverse.append(X_train_reverse[:, selected_t, :])
        
        model = SKip_AE(seq_length, dim, N_RES, N_LAYERS=2, BATCH_SIZE=128)
        history = model.fit(X_train_by_res, X_train_by_res_reverse, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
        
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test_by_res)]
        scores = evaluate(X_test, X_test_rec[0], y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/67 [00:00<?, ?it/s]

yahoo_A1 0.5714285224489823 0.1666666623148149 0.1666666583333337
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998888889001
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.46153841893491365 0.14999999350000026 0.49999997619047704
yahoo_A1 0.6666666133333363 0.6666666005092657 0.6249999718750009
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.7999999200000041 0.6767856563734693 0.6666666311111126
yahoo_A1 0.33333330000000166 0.055555557160493235 0.49999994375000567
yahoo_A1 0.8888888296296326 0.652777698252327 0.6666665777777875
yahoo_A1 0.6666666133333363 0.08333334749999795 -0.0
yahoo_A1 0.9999999000000052 0.7499998875000143 0.9999999375000035
yahoo_A1 0.18181816198347142 0.0 0.0
yahoo_A1 0.46153841893491365 0.0 0.0


In [None]:
yahoo_results = pd.DataFrame(total_scores)

In [None]:
yahoo_results.groupby('dataset').mean()

### NASA

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        N_RES = 3

        seq_length, dim = X_train.shape[1], X_train.shape[2]
        X_train_reverse = np.flip(X_train, axis=1)
        X_test_reverse = np.flip(X_test, axis=1)

        X_train_by_res = []
        X_test_by_res = []
        X_train_by_res_reverse = []
        
        for n in range(N_RES):
            selected_t = [t for t in range(0, seq_length, 2**n)]
            X_train_by_res.append(X_train[:, selected_t, :])
            X_test_by_res.append(X_test[:, selected_t, :])
            X_train_by_res_reverse.append(X_train_reverse[:, selected_t, :])
        
        model = SKip_AE(seq_length, dim, N_RES, N_LAYERS=2, BATCH_SIZE=128)
        history = model.fit(X_train_by_res, X_train_by_res_reverse, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
        
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test_by_res)]
        scores = evaluate(X_test, X_test_rec[0], y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

In [None]:
nasa_results = pd.DataFrame(total_scores)

In [None]:
nasa_results.groupby('dataset').mean()

### SMD

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_smd]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        N_RES = 3

        seq_length, dim = X_train.shape[1], X_train.shape[2]
        X_train_reverse = np.flip(X_train, axis=1)
        X_test_reverse = np.flip(X_test, axis=1)

        X_train_by_res = []
        X_test_by_res = []
        X_train_by_res_reverse = []
        
        for n in range(N_RES):
            selected_t = [t for t in range(0, seq_length, 2**n)]
            X_train_by_res.append(X_train[:, selected_t, :])
            X_test_by_res.append(X_test[:, selected_t, :])
            X_train_by_res_reverse.append(X_train_reverse[:, selected_t, :])
        
        model = SKip_AE(seq_length, dim, N_RES, N_LAYERS=2, BATCH_SIZE=128)
        history = model.fit(X_train_by_res, X_train_by_res_reverse, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
        
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test_by_res)]
        scores = evaluate(X_test, X_test_rec[0], y_tests[i], is_reconstructed=True)
      
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

In [None]:
smd_results = pd.DataFrame(total_scores)

In [None]:
smd_results.groupby('dataset').mean()