In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [4]:
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.binary_crossentropy(data, reconstruction), axis=1
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

In [5]:
def CNN_VAE(X_train):
    latent_dim = 16

    encoder_inputs = keras.Input(shape=(X_train.shape[1], X_train.shape[2]))
    x = layers.Conv1D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
    x = layers.Conv1D(64, 3, activation="relu", strides=2, padding="same")(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation="relu")(x)
    z_mean = layers.Dense(latent_dim, name="z_mean")(x)
    z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
    z = Sampling()([z_mean, z_log_var])
    encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")

    latent_inputs = keras.Input(shape=(latent_dim,))
    x = layers.Dense(32* 64, activation="relu")(latent_inputs)
    x = layers.Reshape((32, 64))(x)
    x = layers.Conv1DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
    x = layers.Conv1DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
    x = layers.Flatten()(x)
    x = layers.Dense(X_train.shape[1] * X_train.shape[2])(x)
    decoder_outputs = layers.Reshape([X_train.shape[1], X_train.shape[2]])(x)
    decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

    model = VAE(encoder, decoder)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001))
    history = model.fit(X_train, epochs=50, batch_size=128, verbose=0)
    return model

### Yahoo S5

In [6]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [7]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_VAE(X_train)
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9850745754065519 0.983943807889094 0.9997952479426399
yahoo_A1 0.9999999416666693 0.9166666376815789 0.9999999913849764
yahoo_A1 0.9999999456521764 0.9565217209833222 0.9999999953614762
yahoo_A1 0.999999930000003 0.7999999500000029 0.9999999797237571
yahoo_A1 0.9230768591716004 0.7255952010491444 0.9700396684125173
yahoo_A1 0.9230768591716004 0.7330320327944443 0.98293649362152
yahoo_A1 0.999999948484851 0.9242424185659951 0.9999999981601732
yahoo_A1 0.9999999492063519 0.9959999948774263 0.9999999987844084
yahoo_A1 0.9999999492063519 0.9920634873720409 0.9999999987844084
yahoo_A1 0.9999999487179513 0.9871794799338866 0.999999998371928
yahoo_A1 0.9781021390777371 0.9830758977051822 0.9987592617322397
yahoo_A1 0.9938649794572648 0.9875028275777142 0.9999560636088743
yahoo_A1 0.9999999483870992 0.9838709593720104 0.9999999980537634
yahoo_A1 0.999999944444447 0.9444444226958288 0.9999999941537467
yahoo_A1 0.9999999487013013 0.9870129799870717 0.9999999983645982
yahoo_A1 0.999999

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.17493470983952586 0.060955292515711496 0.4674302173967504
yahoo_A2 0.17493470983952586 0.060981070878588325 0.433231042285094
yahoo_A2 0.9925925411248311 0.9848534902316813 0.9999763822525424
yahoo_A2 0.4745762214880833 0.45689242655734275 0.8403752791177312
yahoo_A2 0.005025125124365547 0.0012594457175669027 0.1447084088240384
yahoo_A2 0.9999999485074653 0.9850746190040223 0.9999999983492351
yahoo_A2 0.8888888380312286 0.9508090230785602 0.9930250294125758
yahoo_A2 0.004926107880560072 0.0012345677774729584 0.1274297928450488
yahoo_A2 0.45414842982399645 0.3309141839260608 0.8735712247056815
yahoo_A2 0.5405404980277607 0.570237431465294 0.8153107753419673
yahoo_A2 0.666666577777783 0.2499999625000044 0.9978400727866534
yahoo_A2 0.8905108976290721 0.9433253098908769 0.9950756843640398
yahoo_A2 0.3287670732220005 0.30705705134089833 0.5907430935609866
yahoo_A2 0.8113207039693873 0.8718071010363649 0.9731990221087788
yahoo_A2 0.9999998500000123 0.0 0.9999998997840274
yahoo_A2 

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.25396823169564314 0.0013262609223044285 -0.0
yahoo_A3 0.442477841491114 0.16827980542465712 0.2879443964320134
yahoo_A3 0.9011857202643405 0.9622200645924195 0.9837298247214965
yahoo_A3 0.25396823169564314 0.004340662334214554 0.027039006318902215
yahoo_A3 0.8251365620547075 0.8823212947085577 0.8662109219561183
yahoo_A3 0.41726615387920135 0.043150605629332886 0.06153795737683343
yahoo_A3 0.5852089616298454 0.16693101787221173 0.024729788263583928
yahoo_A3 0.45614031550631234 0.06673494167946036 0.16091643990071816
yahoo_A3 0.5210083646578659 0.36106625937459785 0.5205999086816473
yahoo_A3 0.7134502462945892 0.5714902283497714 0.4704831719004913
yahoo_A3 0.637770854195864 0.56669868732161 0.5638527705755241
yahoo_A3 0.5783521396394755 0.4724997410194578 0.5637697484169977
yahoo_A3 0.25825823385908653 0.12490565811617493 0.6132044812418928
yahoo_A3 0.6766916843462069 0.22679362708249884 0.17874651309171286
yahoo_A3 0.11563168070466742 0.0012106544339577385 0.0024213072633362

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.4853700147102329 0.10470647882008756 0.24286439642815996
yahoo_A4 0.26890753963703323 0.01848030099177509 0.09576615316116278
yahoo_A4 0.41726615387920135 0.03913256051502488 0.16893392005621644
yahoo_A4 0.2945736181779962 0.0925633950576123 0.2143839275731135
yahoo_A4 0.7182662043957131 0.81249375343508 0.8143517108462365
yahoo_A4 0.7927272242988459 0.8549389222410892 0.9007693164164009
yahoo_A4 0.6483516038220051 0.5961263517092834 0.6621561107801868
yahoo_A4 0.40123452570302226 0.27660093009472175 0.5986772064115975
yahoo_A4 0.5942491593197877 0.32925837818735 0.46298191934404526
yahoo_A4 0.45070419027970904 0.19595777555348282 0.3627034433053316
yahoo_A4 0.637770854195864 0.10853350132014533 0.0488832692671599
yahoo_A4 0.3882783568463308 0.06552846791417004 0.2958134644533606
yahoo_A4 0.45070419027970904 0.017635405848483887 0.13307184943254344
yahoo_A4 0.3551401575753364 0.01026782732407385 0.0909691766701026
yahoo_A4 0.07439824225732532 0.0011792458468121599 -0.0
yahoo

In [8]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.862802,0.785256,0.876318
yahoo_A2,0.681964,0.469883,0.799709
yahoo_A3,0.597183,0.435267,0.513349
yahoo_A4,0.542656,0.352417,0.450631


### NASA

In [9]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [10]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_VAE(X_train)
        
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.25300017462469454 0.13296894487001693 0.37454971742462734
D2 0.2699530282282525 0.21756620717794117 0.5025344006309231


In [11]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.253,0.132969,0.37455
D2,0.269953,0.217566,0.502534


### SMD

In [12]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [13]:
for loader in [load_smd]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_VAE(X_train)
        
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.20232181144029696 0.5753855920741581 0.521888680406877
smd 0.7141517779711564 0.5516386577636502 0.9182976553226475
smd 0.20111467783211762 0.1745763678070538 0.5911903376062385
smd 0.25709511124931544 0.2873047363563944 0.5907813393239671
smd 0.689655126355723 0.7511517867465145 0.9516453143237289
smd 0.7267976360915273 0.8032251567456201 0.8869642779430862
smd 0.4359626367927769 0.4486698601779356 0.7034005023443057
smd 0.30716719136819937 0.30611850236707355 0.735342488501898
smd 0.5271035125639076 0.5747599561245318 0.8625485716445264
smd 0.2596273980668859 0.30792785511488585 0.5433837534356515
smd 0.710753256473802 0.7487777642568118 0.840550096292183
smd 0.24491919329678966 0.27030654229683954 0.5713197092700563
smd 0.5960077243623663 0.6136700800268254 0.9084684127032229
smd 0.5246179575739174 0.5269636196020161 0.711061941376982
smd 0.5490013458230265 0.5981668008586271 0.9217290815524782
smd 0.967741885076135 0.9561753256936034 0.9814953942223376
smd 0.7643821429023572 

In [14]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.515538,0.525053,0.777415


### ECG

In [15]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [16]:
for loader in [load_ecg]:
    datasets = loader(32, 16)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_VAE(X_train)
        
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.5599999552000028 0.4879766675912788 0.6531181873780253
D2 0.6315788941828289 0.526206208538601 0.8413952868087482
D3 0.16666664861111138 0.02883169106815893 0.3237835168736265
D4 0.3809523428571462 0.20399695917390048 0.6491525347516718
D5 0.5333332833333373 0.30952770957349535 0.8406759818822656
D6 0.31578942700831664 0.1520637224813504 0.6756206698865388
D7 0.05374279705276701 0.017278843643561987 0.434757307534348
D8 0.19251335137979503 0.08028738253335913 0.41571467721684796
D9 0.38805966963689265 0.16173260225718553 0.34468183054611445


In [17]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.56,0.487977,0.653118
D2,0.631579,0.526206,0.841395
D3,0.166667,0.028832,0.323784
D4,0.380952,0.203997,0.649153
D5,0.533333,0.309528,0.840676
D6,0.315789,0.152064,0.675621
D7,0.053743,0.017279,0.434757
D8,0.192513,0.080287,0.415715
D9,0.38806,0.161733,0.344682


### Power Demand

In [18]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [19]:
for loader in [load_power_demand]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_VAE(X_train)
        
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.3606206231571944 0.13386993729632082 0.33812430414059097


In [20]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.360621,0.13387,0.338124


### 2D Gesture

In [21]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [22]:
for loader in [load_gesture]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_VAE(X_train)
        
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.45591589948213174 0.3357151688266453 0.6040000597857351


In [23]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.455916,0.335715,0.604
