In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tqdm.notebook import tqdm
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input, LSTM, Reshape, Dropout, GaussianNoise, Concatenate, Lambda, RepeatVector

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def Skip_AE(seq_length, dim, N_RES=3, N_LAYERS=1, BATCH_SIZE=128):
    tf.keras.backend.clear_session()
    
    en_inputs = []
    shared_latents = []
    for n in range(N_RES):
        selected_t = [t for t in range(0, seq_length, 2**n)]
        en_input = Input(shape=[len(selected_t), dim])
        en_inputs.append(en_input)

    de_outputs = []
    for n in range(N_RES):
        for l in range(N_LAYERS):
            X = en_inputs[n] if l == 0 else X
            X = GaussianNoise(0.5)(X) if l == 0 else X
            X = LSTM(64, return_sequences=False if l + 1 == N_LAYERS else True)(X)
        h = Dense(32, activation='relu', kernel_regularizer='l1')(X)
        shared_latents.append(h)

    for n in range(N_RES):
        selected_t = [t for t in range(0, seq_length, 2**n)]
        for l in range(N_LAYERS):
            if l == 0:
                X = Concatenate()(shared_latents)
                X = Dense(128, activation='relu')(X)
                X = Concatenate()([X, shared_latents[n]])
                X = RepeatVector(len(selected_t))(X)
            X = LSTM(64, return_sequences=False if l + 1 == N_LAYERS else True)(X)
        rec_x = Dense(len(selected_t) * dim)(X)
        rec_x = Reshape([len(selected_t), dim])(rec_x)
        de_outputs.append(rec_x)

    model = Model(inputs=en_inputs, outputs=de_outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="mse")

    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        N_RES = 3

        seq_length, dim = X_train.shape[1], X_train.shape[2]
        X_train_reverse = np.flip(X_train, axis=1)
        X_test_reverse = np.flip(X_test, axis=1)

        X_train_by_res = []
        X_test_by_res = []
        X_train_by_res_reverse = []
        
        for n in range(N_RES):
            selected_t = [t for t in range(0, seq_length, 2**n)]
            X_train_by_res.append(X_train[:, selected_t, :])
            X_test_by_res.append(X_test[:, selected_t, :])
            X_train_by_res_reverse.append(X_train_reverse[:, selected_t, :])
        
        model = Skip_AE(seq_length, dim, N_RES, N_LAYERS=2, BATCH_SIZE=128)
        history = model.fit(X_train_by_res, X_train_by_res_reverse, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
        
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test_by_res)]
        scores = evaluate(X_test, X_test_rec[0], y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.999999948484851 0.9696969626562632 0.9999999981470107
yahoo_A1 0.9999999416666693 0.9166666376815789 0.9999999913849765
yahoo_A1 0.9999999456521764 0.956521720708995 0.9999999953614762
yahoo_A1 0.999999930000003 0.7999999480000032 0.9999999797237573
yahoo_A1 0.9230768591716004 0.7179109509467508 0.9009920503704727
yahoo_A1 0.9230768591716004 0.717660255534233 0.9015872879775448
yahoo_A1 0.999999948484851 0.969696962696227 0.9999999981601732
yahoo_A1 0.9999999492063519 0.9920634872478045 0.9999999987844084
yahoo_A1 0.9999999492063519 0.992063487180122 0.9999999987844084
yahoo_A1 0.9999999487179513 0.9487179430329566 0.999999998371928
yahoo_A1 0.9963099123650302 0.9925379201830041 0.9999673670680758
yahoo_A1 0.9999999487654349 0.9876543142127767 0.9999999984095602
yahoo_A1 0.9999999483870992 0.9677419277058845 0.9999999980537634
yahoo_A1 0.999999944444447 0.8888888716321159 0.9999999941537466
yahoo_A1 0.9999999487013013 0.9870129799630335 0.9999999983645983
yahoo_A1 0.99999994

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.9999999485074653 0.9850746190014044 0.9999999983492349
yahoo_A2 0.9999999485074653 0.9850746189412825 0.9999999983492349
yahoo_A2 0.9999999485074653 0.9701492469331415 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9903846098918182 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.999999899784027
yahoo_A2 0.9999999485074653 0.9701492470117672 0.9999999983492349
yahoo_A2 0.9999999490384642 0.9903846098645308 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.8311687809327065 0.7623752206265004 0.9793241056949096
yahoo_A2 0.9999999490384642 0.9903846098915461 0.9999999988797315
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.9999999485074653 0.9850746190043864 0.999999998349235
yahoo_A2 0.9999999490384642 0.9903846098471291 0.9999999988797315
yahoo_A2 0.5549737718045055 0.607229213182199 0.8832395847471731
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.8133332828177808 0.7653282635156582 0.9814259383005461
yahoo_A

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.25396823169564314 0.002645503616392491 -0.0
yahoo_A3 0.442477841491114 0.20191682533099842 0.32977436223327566
yahoo_A3 0.9176470084429093 0.9618512565169975 0.9801721386285549
yahoo_A3 0.25396823169564314 0.001326260929714058 -0.0
yahoo_A3 0.7947367916925241 0.8802454593135991 0.878443968785417
yahoo_A3 0.41726615387920135 0.10342506586890683 0.1382768966292481
yahoo_A3 0.5852089616298454 0.2528003865626263 0.12137194905012542
yahoo_A3 0.45825928955576367 0.08395758234720899 0.20264433160661557
yahoo_A3 0.5210083646578659 0.5494926202049178 0.6856728943504886
yahoo_A3 0.7134502462945892 0.5707750142263852 0.46812132669509215
yahoo_A3 0.637770854195864 0.5764878928749207 0.4986632723539781
yahoo_A3 0.5783521396394755 0.5691513732622765 0.5177003158525797
yahoo_A3 0.24999997799202311 0.08685868109503675 0.41714038122710306
yahoo_A3 0.6766916843462069 0.5293994525114056 0.2981902005618742
yahoo_A3 0.11563168070466742 0.0012077301733057306 -0.0
yahoo_A3 0.8495574722844418 0.930

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.8395061234364709 0.8719433981570495 0.8918924267150741
yahoo_A4 0.26890753963703323 0.001445087784987022 0.08510638245625994
yahoo_A4 0.41726615387920135 0.12327932642155423 0.25545155534250813
yahoo_A4 0.31067958525780215 0.06936637400920384 0.2252996998855609
yahoo_A4 0.7987804392214485 0.8422668820505975 0.8346436903288543
yahoo_A4 0.7927272242988459 0.8273889172319309 0.8673110560454775
yahoo_A4 0.6483516038220051 0.582713660229045 0.623102983301391
yahoo_A4 0.666666616021054 0.6480415155772209 0.7278587073850769
yahoo_A4 0.6084033181634095 0.3038297415407498 0.40175080454891926
yahoo_A4 0.5289255807117028 0.5615790172673154 0.6484249792535381
yahoo_A4 0.6795095860606518 0.3964768449513648 0.48440195070593517
yahoo_A4 0.43902435490051145 0.20030710985399902 0.5644750298921495
yahoo_A4 0.45070419027970904 0.0031847144476671936 -0.0
yahoo_A4 0.37551017342774085 0.19653340959560067 0.5187280294107938
yahoo_A4 0.07439824225732532 0.0011792465267388328 -0.0
yahoo_A4 0.9037036

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.897565,0.806933,0.894195
yahoo_A2,0.877235,0.573224,0.962131
yahoo_A3,0.640469,0.516348,0.558906
yahoo_A4,0.581571,0.433789,0.518512


### NASA

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        N_RES = 3

        seq_length, dim = X_train.shape[1], X_train.shape[2]
        X_train_reverse = np.flip(X_train, axis=1)
        X_test_reverse = np.flip(X_test, axis=1)

        X_train_by_res = []
        X_test_by_res = []
        X_train_by_res_reverse = []
        
        for n in range(N_RES):
            selected_t = [t for t in range(0, seq_length, 2**n)]
            X_train_by_res.append(X_train[:, selected_t, :])
            X_test_by_res.append(X_test[:, selected_t, :])
            X_train_by_res_reverse.append(X_train_reverse[:, selected_t, :])
        
        model = Skip_AE(seq_length, dim, N_RES, N_LAYERS=2, BATCH_SIZE=128)
        history = model.fit(X_train_by_res, X_train_by_res_reverse, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
        
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test_by_res)]
        scores = evaluate(X_test, X_test_rec[0], y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.25300017462469454 0.13597482822023738 0.3967071569961004
D2 0.2699530282282525 0.5840009300457375 0.5173913038147389


In [None]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.253,0.135975,0.396707
D2,0.269953,0.584001,0.517391


### SMD

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_smd]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        N_RES = 3

        seq_length, dim = X_train.shape[1], X_train.shape[2]
        X_train_reverse = np.flip(X_train, axis=1)
        X_test_reverse = np.flip(X_test, axis=1)

        X_train_by_res = []
        X_test_by_res = []
        X_train_by_res_reverse = []
        
        for n in range(N_RES):
            selected_t = [t for t in range(0, seq_length, 2**n)]
            X_train_by_res.append(X_train[:, selected_t, :])
            X_test_by_res.append(X_test[:, selected_t, :])
            X_train_by_res_reverse.append(X_train_reverse[:, selected_t, :])
        
        model = Skip_AE(seq_length, dim, N_RES, N_LAYERS=2, BATCH_SIZE=128)
        history = model.fit(X_train_by_res, X_train_by_res_reverse, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
        
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test_by_res)]
        scores = evaluate(X_test, X_test_rec[0], y_tests[i], is_reconstructed=True)
      
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

In [None]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

### ECG

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_ecg]:
    datasets = loader(32, 16)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        N_RES = 3

        seq_length, dim = X_train.shape[1], X_train.shape[2]
        X_train_reverse = np.flip(X_train, axis=1)
        X_test_reverse = np.flip(X_test, axis=1)

        X_train_by_res = []
        X_test_by_res = []
        X_train_by_res_reverse = []
        
        for n in range(N_RES):
            selected_t = [t for t in range(0, seq_length, 2**n)]
            X_train_by_res.append(X_train[:, selected_t, :])
            X_test_by_res.append(X_test[:, selected_t, :])
            X_train_by_res_reverse.append(X_train_reverse[:, selected_t, :])
        
        model = Skip_AE(seq_length, dim, N_RES, N_LAYERS=2, BATCH_SIZE=128)
        history = model.fit(X_train_by_res, X_train_by_res_reverse, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
        
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test_by_res)]
        scores = evaluate(X_test, X_test_rec[0], y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

In [None]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

### Power Demand

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_power_demand]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        N_RES = 3

        seq_length, dim = X_train.shape[1], X_train.shape[2]
        X_train_reverse = np.flip(X_train, axis=1)
        X_test_reverse = np.flip(X_test, axis=1)

        X_train_by_res = []
        X_test_by_res = []
        X_train_by_res_reverse = []
        
        for n in range(N_RES):
            selected_t = [t for t in range(0, seq_length, 2**n)]
            X_train_by_res.append(X_train[:, selected_t, :])
            X_test_by_res.append(X_test[:, selected_t, :])
            X_train_by_res_reverse.append(X_train_reverse[:, selected_t, :])
        
        model = Skip_AE(seq_length, dim, N_RES, N_LAYERS=2, BATCH_SIZE=128)
        history = model.fit(X_train_by_res, X_train_by_res_reverse, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
        
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test_by_res)]
        scores = evaluate(X_test, X_test_rec[0], y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

In [None]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

### 2D Gesture

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_gesture]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        N_RES = 3

        seq_length, dim = X_train.shape[1], X_train.shape[2]
        X_train_reverse = np.flip(X_train, axis=1)
        X_test_reverse = np.flip(X_test, axis=1)

        X_train_by_res = []
        X_test_by_res = []
        X_train_by_res_reverse = []
        
        for n in range(N_RES):
            selected_t = [t for t in range(0, seq_length, 2**n)]
            X_train_by_res.append(X_train[:, selected_t, :])
            X_test_by_res.append(X_test[:, selected_t, :])
            X_train_by_res_reverse.append(X_train_reverse[:, selected_t, :])
        
        model = Skip_AE(seq_length, dim, N_RES, N_LAYERS=2, BATCH_SIZE=128)
        history = model.fit(X_train_by_res, X_train_by_res_reverse, epochs=50, batch_size=128, validation_split=0.3, verbose=0,
                            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
        
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test_by_res)]
        scores = evaluate(X_test, X_test_rec[0], y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

In [None]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()