In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tqdm.notebook import tqdm
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Bidirectional, LSTM, LSTMCell, GRU, GRUCell, Reshape, Dropout, GaussianNoise, Concatenate, Lambda, RepeatVector, TimeDistributed, Add

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
class SkipRNN(tf.keras.layers.Layer):
    def __init__(self, cell, return_sequences=False, **kwargs):
        super().__init__(**kwargs)
        self.cell = cell
        self.return_sequences = return_sequences
        self.get_initial_state = getattr(
            self.cell, "get_initial_state", self.fallback_initial_state)
    def fallback_initial_state(self, inputs):
        return [tf.zeros([self.cell.state_size], dtype=inputs.dtype)]
    @tf.function
    def call(self, inputs, states=None):
        states = self.get_initial_state(inputs) if states == None else states

        outputs = tf.zeros(shape=[self.cell.output_size], dtype=inputs.dtype)
        outputs, states = self.cell(inputs, states)

        return outputs, states
    
def Modified_S_RNN(X_train):
    tf.keras.backend.clear_session()

    sparseness_weights = [(0, 1), (1, 0), (1, 1)]
    BATCH_SIZE = 128
    N, N_LAYERS, N_UNITS = 3, 1, 64

    X_train_reverse = np.flip(X_train, axis=1)
    seq_length, dim = X_train.shape[1], X_train.shape[2]

    en_input = Input(shape=[seq_length, dim])
    X = GaussianNoise(0.1)(en_input)
    initial_states = tf.zeros([BATCH_SIZE, N_UNITS])

    shared_latents = []
    for i in range(N):
        prev_states = []
        skip_length = 2**i
        w1, w2 = np.array(sparseness_weights)[np.random.choice(3, size=1)][0]
        w = w1 + w2

        for t in range(seq_length):
            Xt = Lambda(lambda x: x[:, t, :])(X)
            if t == 0:
                O, H = SkipRNN(GRUCell(N_UNITS))(Xt)
            else:
                if t - skip_length >= 0:
                    states = (w1 * prev_states[t-1] + w2 * prev_states[t-skip_length]) / w
                    O, H = SkipRNN(GRUCell(N_UNITS))(Xt, prev_states[t-1])
                else:
                    O, H = SkipRNN(GRUCell(N_UNITS))(Xt, prev_states[t-1])

            prev_states.append(H)
        shared_latents.append(H)

    de_outputs = []
    de_input = Concatenate()(shared_latents)
    D_shared = Dense(dim, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.005))(de_input)

    for i in range(N):
        Y_i = []
        prev_states = []
        skip_length = 2**i
        w1, w2 = np.array(sparseness_weights)[np.random.choice(3, size=1)][0]
        w = w1 + w2
        
        D_each = Dense(dim, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.005))(shared_latents[i])

        D = Concatenate()([D_shared, D_each])
        D = Dense(dim)(D)

        for t in range(seq_length):
            if t == 0:
                y = Dense(dim)(D)
                _, H = SkipRNN(GRUCell(dim))(y, D) # y_t
            else:
                if t - skip_length >= 0:
                    states = (w1 * prev_states[t-1] + w2 * prev_states[t-skip_length]) / w
                    y, H = SkipRNN(GRUCell(dim))(Y_i[t-1], states) # y_t-1 --> y_1
                else:
                    y, H = SkipRNN(GRUCell(dim))(Y_i[t-1], prev_states[t-1]) # y_t-1 --> y_1

            Y_i.append(y)
            prev_states.append(H)

        Y_i = Concatenate()(Y_i)
        Y_i = Reshape([seq_length, dim])(Y_i)
        de_outputs.append(Y_i)

    model = Model(inputs=en_input, outputs=de_outputs)
    
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=0.001, decay_steps=10000, decay_rate=0.9)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=2.5), loss='mse')

    history = model.fit(X_train, [X_train_reverse for _ in range(N)], batch_size=BATCH_SIZE, epochs=50, validation_split=0.3, verbose=0, callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490291289
yahoo_A1 0.9999999166666703 0.6666666055555603 0.9999999656957942
yahoo_A1 0.9999999333333361 0.8333332883333356 0.9999999823333336
yahoo_A1 0.9999998500000123 0.0 0.9999998990476292
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384643
yahoo_A1 0.9999998500000123 0.49999990416668233 0.9999999485576953
yahoo_A1 0.9999999166666703 0.6666666055555603 0.999999965714287
yahoo_A1 0.9999999466666691 0.9666666518500433 0.9999999953333334
yahoo_A1 0.9830507941396176 0.9626856654158878 0.9982222175638519
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790099013
yahoo_A1 0.9999999333333361 0.8333332883333356 0.9999999823232326
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790000003
yahoo_A1 0.9999999437500025 0.937499976331382 0.9999999926264045
yahoo_A1 0.05555555005144069 0.004854369563728194 -0.0
yahoo_A1 0.9090908429752097 0.7633332859111138 0.9980582314970312
yahoo_A1 0.9999999409090937 0.909090878

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.08510637464916311 0.02628810507800277 0.6162162004777214
yahoo_A2 0.39999995200000177 0.014867198979721922 0.538513497439768
yahoo_A2 0.8571427836734729 0.5329415528273315 0.9689188941722066
yahoo_A2 0.9230768591716004 0.722464309712689 0.9199700686496578
yahoo_A2 0.046511623147647396 0.011904760685941164 0.6821704738897971
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.8333332708333363 0.614716967208143 0.9498877944594027
yahoo_A2 0.03846153454142012 0.009803920569012017 0.6124030390601588
yahoo_A2 0.8571427836734729 0.5052847336635234 0.7810810611318484
yahoo_A2 0.7272726677685981 0.4717470599771136 0.8904263144125347
yahoo_A2 0.14285712755102062 0.03846153431952704 0.9069766527852984
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594599
yahoo_A2 0.2666666133333427 0.08254881564223657 0.7251308793136938
yahoo_A2 0.9230768591716004 0.7419785722767022 0.9798055202691478
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.04

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.06349205714285747 0.01111111121164737 0.6393442236562779
yahoo_A3 0.1355931964378091 0.04518506828888878 0.39100984682491247
yahoo_A3 0.7999999360000031 0.5539776430895509 0.9303672153099527
yahoo_A3 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A3 0.8235293536332209 0.7010846278309131 0.9563840031227845
yahoo_A3 0.0624999938476567 0.006941924214929476 0.12916665801736177
yahoo_A3 0.17647057188581453 0.03927173675262849 0.2012726087121992
yahoo_A3 0.4999999500000025 0.19104651473151027 0.5861581755955351
yahoo_A3 0.28571425306122533 0.05828386979848859 0.673022572068532
yahoo_A3 0.2857142331065848 0.08426990930182059 0.46924080310825667
yahoo_A3 0.3333332902777817 0.19430814303652613 0.8049272367105104
yahoo_A3 0.18181816198347142 0.05954904927394218 0.5069409561525154
yahoo_A3 0.041237109278350725 0.0056179777522529775 0.28278686862066765
yahoo_A3 0.28571425714285853 0.14382689942092747 0.4137648766734175
yahoo_A3 0.04724408980097991 0.004098361191078604 -0.0
yahoo_A3

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.4999999500000025 0.18741302275789137 0.4652542260139092
yahoo_A4 0.03846153461538481 0.005681818344900532 0.29508194647944264
yahoo_A4 0.0624999938476567 0.017307746498102565 0.39305553698726947
yahoo_A4 0.06611569597705123 0.011398517580547399 0.28541665597743104
yahoo_A4 0.7142856551020442 0.572362505078989 0.7823275752043259
yahoo_A4 0.399999960000002 0.2372510036885901 0.7564655069948882
yahoo_A4 0.0624999938476567 0.005050505755497663 0.1874999829687516
yahoo_A4 0.07751937227330145 0.004273505226784364 0.02521008149142026
yahoo_A4 0.19354835463059794 0.07036540172006563 0.5837570511517127
yahoo_A4 0.33333327777778377 0.08355858879289353 0.5763888696469914
yahoo_A4 0.1538461317869851 0.057962905985327016 0.5006157562483697
yahoo_A4 0.0624999938476567 0.02082131353313365 0.5215277531707189
yahoo_A4 0.49999993750000526 0.13363951402996208 0.7958333127743061
yahoo_A4 0.2105262703601196 0.052096408773199404 0.6348869858802754
yahoo_A4 0.03174602857142873 0.004065040847075319

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.7078,0.450566,0.753282
yahoo_A2,0.725891,0.373952,0.885122
yahoo_A3,0.324389,0.151829,0.547512
yahoo_A4,0.241782,0.095219,0.468009


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2295534087061448 0.11772283899052041 0.4689511793343621
D2 0.19498578202833744 0.5110220665433631 0.5111482204915399


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.229553,0.117723,0.468951
D2,0.194986,0.511022,0.511148


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')
      
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.17627464443920743 0.553299197714649 0.5087209301507045
smd 0.49816844853413245 0.3869926066123922 0.9051105190777353
smd 0.1090046894465761 0.10510791442734596 0.5360856365238599
smd 0.10256405257596121 0.11225368191784964 0.5349112402820174
smd 0.6551723644470901 0.6168591294309058 0.8866129859563114
smd 0.8646487227167925 0.8728602121992414 0.9174999031005202
smd 0.21204814273706443 0.25572395572302226 0.5644489375896651
smd 0.3065326138304668 0.2637577248372329 0.6561375724107532
smd 0.4485293627358672 0.402159238802739 0.7780860084250553
smd 0.21837868410447125 0.20426444391019774 0.5089159299093855
smd 0.4915253826055764 0.5970937509022821 0.6702949232010612
smd 0.32200354696893857 0.3289304124601258 0.6922058034480926
smd 0.4812622776933632 0.4319758491223679 0.8046234270893897
smd 0.45751630398564924 0.38771291884338577 0.6570954807632023
smd 0.790513783936637 0.813428646895419 0.9270172811266225
smd 0.9499999477500025 0.8942184364362131 0.9760487986827799
smd 0.5056360319

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.455258,0.463174,0.741057


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.49746188567600663 0.48131108719885307 0.5862284599422872
D2 0.33093520269137927 0.301525932118896 0.6148477001612856
D3 0.16326526767551458 0.11285157152639619 0.5465618534396066
D4 0.26116834962742996 0.1591147382424108 0.609933000365872
D5 0.38222217868642466 0.24387858839657428 0.7998445344716301
D6 0.16457959071175676 0.08406973856279265 0.4712545883731213
D7 0.10541725148760808 0.03626924004659212 0.6561053370303733
D8 0.17583891011296932 0.07640244656262399 0.39316093118342715
D9 0.3580130914399724 0.16214429128109675 0.3372919352552929


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.497462,0.481311,0.586228
D2,0.330935,0.301526,0.614848
D3,0.163265,0.112852,0.546562
D4,0.261168,0.159115,0.609933
D5,0.382222,0.243879,0.799845
D6,0.16458,0.08407,0.471255
D7,0.105417,0.036269,0.656105
D8,0.175839,0.076402,0.393161
D9,0.358013,0.162144,0.337292


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(16, 8)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.275510180044775 0.09679866136669413 0.3152191594705527


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.27551,0.096799,0.315219


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.47587350178931215 0.39519105196868415 0.6329117073642826


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.475874,0.395191,0.632912
