In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tqdm.notebook import tqdm
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Bidirectional, LSTM, LSTMCell, GRU, GRUCell, Reshape, Dropout, GaussianNoise, Concatenate, Lambda, RepeatVector, TimeDistributed, Add

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
class SkipRNN(tf.keras.layers.Layer):
    def __init__(self, cell, return_sequences=False, **kwargs):
        super().__init__(**kwargs)
        self.cell = cell
        self.return_sequences = return_sequences
        self.get_initial_state = getattr(
            self.cell, "get_initial_state", self.fallback_initial_state)
    def fallback_initial_state(self, inputs):
        return [tf.zeros([self.cell.state_size], dtype=inputs.dtype)]
    @tf.function
    def call(self, inputs, states=None):
        states = self.get_initial_state(inputs) if states == None else states

        outputs = tf.zeros(shape=[self.cell.output_size], dtype=inputs.dtype)
        outputs, states = self.cell(inputs, states)

        return outputs, states
    
def Modified_S_RNN(X_train):
    tf.keras.backend.clear_session()

    sparseness_weights = [(0, 1), (1, 0), (1, 1)]
    BATCH_SIZE = 128
    N, N_LAYERS, N_UNITS = 3, 1, 64

    X_train_reverse = np.flip(X_train, axis=1)
    seq_length, dim = X_train.shape[1], X_train.shape[2]

    en_input = Input(shape=[seq_length, dim])
    X = GaussianNoise(0.1)(en_input)
    initial_states = tf.zeros([BATCH_SIZE, N_UNITS])

    shared_latents = []
    for i in range(N):
        prev_states = []
        skip_length = 2**i
        w1, w2 = np.array(sparseness_weights)[np.random.choice(3, size=1)][0]
        w = w1 + w2

        for t in range(seq_length):
            Xt = Lambda(lambda x: x[:, t, :])(X)
            if t == 0:
                O, H = SkipRNN(GRUCell(N_UNITS))(Xt)
            else:
                if t - skip_length >= 0:
                    states = (w1 * prev_states[t-1] + w2 * prev_states[t-skip_length]) / w
                    O, H = SkipRNN(GRUCell(N_UNITS))(Xt, prev_states[t-1])
                else:
                    O, H = SkipRNN(GRUCell(N_UNITS))(Xt, prev_states[t-1])

            prev_states.append(H)
        shared_latents.append(H)

    de_outputs = []
    de_input = Concatenate()(shared_latents)
    D_shared = Dense(dim, activation='relu')(de_input)

    for i in range(N):
        Y_i = []
        prev_states = []
        skip_length = 2**i
        w1, w2 = np.array(sparseness_weights)[np.random.choice(3, size=1)][0]
        w = w1 + w2
        
        D_each = Dense(dim, activation='relu')(shared_latents[i])

        D = Concatenate()([D_shared, D_each])
        D = Dense(dim)(D)

        for t in range(seq_length):
            if t == 0:
                y = Dense(dim)(D)
                _, H = SkipRNN(GRUCell(dim))(y, D) # y_t
            else:
                if t - skip_length >= 0:
                    states = (w1 * prev_states[t-1] + w2 * prev_states[t-skip_length]) / w
                    y, H = SkipRNN(GRUCell(dim))(Y_i[t-1], states) # y_t-1 --> y_1
                else:
                    y, H = SkipRNN(GRUCell(dim))(Y_i[t-1], prev_states[t-1]) # y_t-1 --> y_1

            Y_i.append(y)
            prev_states.append(H)

        Y_i = Concatenate()(Y_i)
        Y_i = Reshape([seq_length, dim])(Y_i)
        de_outputs.append(Y_i)

    model = Model(inputs=en_input, outputs=de_outputs)
    
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=0.001, decay_steps=10000, decay_rate=0.9)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=2.5), loss='mse')

    history = model.fit(X_train, [X_train_reverse for _ in range(N)], batch_size=BATCH_SIZE, epochs=50, validation_split=0.3, verbose=0, callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min", restore_best_weights=True)]) 
    return model

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490291289
yahoo_A1 0.9999999166666703 0.6666666055555603 0.9999999656957942
yahoo_A1 0.9999999333333361 0.8333332883333356 0.9999999823333336
yahoo_A1 0.9999998500000123 0.0 0.9999998990476292
yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490384643
yahoo_A1 0.9999998500000123 0.4999998958333508 0.999999948076926
yahoo_A1 0.9999999166666703 0.6666666055555603 0.999999965714287
yahoo_A1 0.9999999466666691 0.9666666518498296 0.9999999953333335
yahoo_A1 0.9830507941396176 0.9626856655547767 0.998222217563852
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790099013
yahoo_A1 0.9999999333333361 0.8333332883333356 0.9999999823232327
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790000003
yahoo_A1 0.9999999437500025 0.937499976331382 0.9999999926264045
yahoo_A1 0.05555555005144069 0.004854369563728194 -0.0
yahoo_A1 0.9090908429752097 0.7633332859111138 0.9980582314970312
yahoo_A1 0.9999999409090937 0.909090878331

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.08510637464916311 0.02628810507800277 0.6162162004777214
yahoo_A2 0.39999995200000177 0.014887325642322586 0.539189173075664
yahoo_A2 0.8571427836734729 0.5123993543840494 0.9094594362313738
yahoo_A2 0.9230768591716004 0.7221503616312263 0.9166043244960616
yahoo_A2 0.046511623147647396 0.011904760685941164 0.6821704738897971
yahoo_A2 0.9999999250000032 0.7499999447916703 0.99999997445946
yahoo_A2 0.8333332708333363 0.5940037681745105 0.8586387307396807
yahoo_A2 0.03846153454142012 0.009803920569012017 0.6124030390601587
yahoo_A2 0.8571427836734729 0.5057728755780244 0.7999999795675681
yahoo_A2 0.7272726677685981 0.47164291812820175 0.890052342839913
yahoo_A2 0.14285712755102062 0.03846153431952704 0.9069766527852984
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594599
yahoo_A2 0.2666666133333427 0.08227126862060263 0.7243829361684501
yahoo_A2 0.9230768591716004 0.7423844164242503 0.9801794918417697
yahoo_A2 0.9999998500000123 0.0 0.9999998992248162
yahoo_A2 0.04

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.06349205714285747 0.01111111121164737 0.6393442236562779
yahoo_A3 0.12213739292582114 0.03212818382844667 0.1907584127071036
yahoo_A3 0.7999999360000031 0.5539776430895509 0.9303672153099527
yahoo_A3 0.03174602857142873 0.008064515719302832 -0.0
yahoo_A3 0.8235293536332209 0.7021372593885309 0.9568225996092419
yahoo_A3 0.0624999938476567 0.006998517275376565 0.137499990927084
yahoo_A3 0.17647057188581453 0.041288474191066994 0.20957663102023427
yahoo_A3 0.4999999500000025 0.21632179381212835 0.6350282341506355
yahoo_A3 0.28571425306122533 0.04475023222523769 0.6644067529449484
yahoo_A3 0.333333280246921 0.14767679214702523 0.6260599316566243
yahoo_A3 0.3333332902777817 0.1955753703366907 0.8075901439641927
yahoo_A3 0.18181816198347142 0.05941353373324062 0.5057713655560755
yahoo_A3 0.041237109278350725 0.0056179777522529775 0.28278686862066765
yahoo_A3 0.24999995937500538 0.09509912445896761 0.41028138106352374
yahoo_A3 0.04724408980097991 0.004098361191078604 -0.0
yahoo_A3 

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.4999999500000025 0.1873223027214565 0.46242936732515433
yahoo_A4 0.03846153461538481 0.005681818344900532 0.29508194647944264
yahoo_A4 0.0624999938476567 0.017244326916423983 0.3902777593043991
yahoo_A4 0.07272726558677738 0.01099102423683685 0.25833332409027815
yahoo_A4 0.7142856551020442 0.5723214646599272 0.7818657525569958
yahoo_A4 0.399999960000002 0.23736070763299993 0.7526939553211462
yahoo_A4 0.0624999938476567 0.005050505753655636 0.1874999827604183
yahoo_A4 0.07751937227330145 0.004273505226784364 0.02521008149142026
yahoo_A4 0.19354835463059794 0.07036540172006563 0.5837570511517127
yahoo_A4 0.33333327777778377 0.0835585887712297 0.576388868952547
yahoo_A4 0.15624997763672163 0.05823233357010429 0.5022321355371164
yahoo_A4 0.0624999938476567 0.02098352444423614 0.5249999755000012
yahoo_A4 0.49999993750000526 0.1337454462303475 0.796874979414063
yahoo_A4 0.2105262703601196 0.052096408773199404 0.6348869858802754
yahoo_A4 0.03174602857142873 0.004065040847075319 -0.

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.707439,0.446601,0.755069
yahoo_A2,0.724114,0.375807,0.883713
yahoo_A3,0.317683,0.146966,0.546903
yahoo_A4,0.244869,0.095203,0.468158


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2295534087061448 0.11762877753022205 0.46844464614638154
D2 0.19498578202833744 0.5083947861392297 0.5111475340818955


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.229553,0.117629,0.468445
D2,0.194986,0.508395,0.511148


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')
      
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.17627464443920743 0.5532991977145021 0.5087209301507045
smd 0.33699628748004856 0.2580080741638467 0.8847260140155961
smd 0.10926360808167426 0.10970440212517421 0.5361658679048326
smd 0.10256405257596121 0.11219965121102635 0.5349068893731919
smd 0.6551723644470901 0.6436556387983943 0.8865870430496461
smd 0.8715336233652966 0.8682196227653798 0.9167334775368948
smd 0.23326955025373078 0.2862656970575752 0.5722417070362382
smd 0.34078207560001905 0.3206049267053512 0.6976462953289246
smd 0.4360464619338212 0.3964996668325035 0.7778729942281059
smd 0.21837868410447125 0.20728784866767216 0.5090117417016153
smd 0.4247787230323472 0.5622940092204025 0.6408861823580441
smd 0.32422583623412254 0.3317518660795705 0.6969086648912206
smd 0.49452264239459137 0.44991380718719154 0.7692616026164828
smd 0.47560971569750476 0.6130075104028975 0.6635596692255572
smd 0.8134327853141042 0.8169072025745493 0.9209893595489601
smd 0.9499999477500025 0.8942184364631406 0.9760487986827799
smd 0.5056

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.450015,0.472055,0.738453


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.4949494514335309 0.48169313913123213 0.5910862147321889
D2 0.33093520269137927 0.3013956098337706 0.6138557528211419
D3 0.17421598881618955 0.1142723546235983 0.5794446634895938
D4 0.2558139223296414 0.15584655976005204 0.6052374254957745
D5 0.3930130573101246 0.24195966877013156 0.8039314213782479
D6 0.16295024215111542 0.08309786610761033 0.46201720244954186
D7 0.08802813974348818 0.03493823980823107 0.6552185251171713
D8 0.17583891011296932 0.0763224690523469 0.3916083847569114
D9 0.3580130914399724 0.1661891613647386 0.3588730668104365


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.494949,0.481693,0.591086
D2,0.330935,0.301396,0.613856
D3,0.174216,0.114272,0.579445
D4,0.255814,0.155847,0.605237
D5,0.393013,0.24196,0.803931
D6,0.16295,0.083098,0.462017
D7,0.088028,0.034938,0.655219
D8,0.175839,0.076322,0.391608
D9,0.358013,0.166189,0.358873


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(16, 8)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.275510180044775 0.09679866136669413 0.3152191594705527


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.27551,0.096799,0.315219


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = Modified_S_RNN(X_train)
        X_test_rec = [np.flip(rec, axis=1) for rec in model.predict(X_test)]
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True, scoring='square_median')

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.4707623588547267 0.3952691535794135 0.6231533798563943


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.470762,0.395269,0.623153
