In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [4]:
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.binary_crossentropy(data, reconstruction), axis=1
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

In [5]:
def CNN_VAE(X_train):
    latent_dim = 16

    encoder_inputs = keras.Input(shape=(X_train.shape[1], X_train.shape[2]))
    x = layers.Conv1D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
    x = layers.Conv1D(64, 3, activation="relu", strides=2, padding="same")(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation="relu")(x)
    z_mean = layers.Dense(latent_dim, name="z_mean")(x)
    z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
    z = Sampling()([z_mean, z_log_var])
    encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")

    latent_inputs = keras.Input(shape=(latent_dim,))
    x = layers.Dense(32* 64, activation="relu")(latent_inputs)
    x = layers.Reshape((32, 64))(x)
    x = layers.Conv1DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
    x = layers.Conv1DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
    x = layers.Flatten()(x)
    x = layers.Dense(X_train.shape[1] * X_train.shape[2])(x)
    decoder_outputs = layers.Reshape([X_train.shape[1], X_train.shape[2]])(x)
    decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

    model = VAE(encoder, decoder)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001))
    history = model.fit(X_train, epochs=50, batch_size=128, verbose=0)
    return model

### Yahoo S5

In [6]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [7]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_VAE(X_train)
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.5714285142857172 0.1666666606944441 0.33333328888889363
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.33333329444444565 0.09999998800000123 0.49999993750000654
yahoo_A1 0.9999998500000123 0.7499998666666867 0.9999999000000085
yahoo_A1 0.9999998500000123 0.7499998666666867 0.9999999000000084
yahoo_A1 0.33333329444444565 0.09999998800000123 -0.0
yahoo_A1 0.8888888197530895 0.25000000993055255 -0.0
yahoo_A1 0.5714285142857172 0.1999999860000008 0.16666665277777867
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998750000131
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.33333329444444565 0.0 0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.7499999343750033 0.2916666536111113 0.24999997500000196
yahoo_A1 0.7499999343750033 0.1666666744444422 -0.0
yahoo_A1 0.7499999343750033 0.3194444278009265 -0.0
yahoo_A1 0.7499999343750033 0.5972221582407472 0.5833332805555593
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.7999999200000

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.46153841893491365 0.10416666659143475 -0.0
yahoo_A2 0.46153841893491365 0.12499999899470854 0.2857142673469398
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.4285713887755125 0.07142857588525468 0.24999997812500177
yahoo_A2 0.24999997187500062 0.0 0.0
yahoo_A2 0.46153841893491365 0.1190476180281553 0.14285713367346983
yahoo_A2 0.46153841893491365 0.10863095196292713 0.17708332469618088
yahoo_A2 0.33333329444444565 0.09999998800000123 0.3333332944444484
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.46153841893491365 0.07142857629771254 0.2499999802083349
yahoo_A2 0.24999997187500062 0.07142856326530694 -0.0
yahoo_A2 0.46153841893491365 0.13425925480838485 -0.0
yahoo_A2 0.4285713887755125 0.06250000558869845 0.1249999921875004
yahoo_A2 0.4444443901234612 0.17361110408522745 0.39583331414930634
yahoo_A2 0.24999997187500062 0.07142856326530694 -0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.46153841893491365 0.1884920544

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.4999999500000025 0.12499999770833278 0.24999996875000327
yahoo_A3 0.4999999500000025 0.2083333178472232 0.37499995937500386
yahoo_A3 0.7999999360000031 0.16666667972221952 -0.0
yahoo_A3 0.4999999500000025 0.16249999164583348 0.12499999062500056
yahoo_A3 0.9090908429752097 0.49999997151389086 0.19999997600000247
yahoo_A3 0.4999999500000025 0.10000000133333238 -0.0
yahoo_A3 0.8571427836734729 0.6499999463055601 0.7499999437500033
yahoo_A3 0.6666666074074105 7.499999185480748e-09 0.0
yahoo_A3 0.8888888197530895 0.722222152060192 0.8333332680555596
yahoo_A3 0.9999999333333361 0.8999999229027853 0.0
yahoo_A3 0.8888888197530895 0.43749997541666774 0.7499999062500098
yahoo_A3 0.9090908429752097 0.6749999523888925 0.39999995200000493
yahoo_A3 0.4999999375000028 0.16666664444444684 0.5999999280000075
yahoo_A3 0.7999999360000031 0.34722220761574113 -0.0
yahoo_A3 0.28571425306122533 0.08333332361111209 -0.0
yahoo_A3 0.9090908429752097 0.2500000140138856 -0.0
yahoo_A3 0.4999999500000025

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.7999999200000041 0.25000000083333124 0.6666666166666696
yahoo_A4 0.4999999500000025 0.10000000133333238 -0.0
yahoo_A4 0.6666666074074105 0.37499996750000286 0.5833332680555616
yahoo_A4 0.5714285142857172 0.1250000006249989 0.2499999812500011
yahoo_A4 0.9090908429752097 0.47999998088333384 -0.0
yahoo_A4 0.7999999360000031 0.336111098439815 -0.0
yahoo_A4 0.4999999500000025 0.12499999770833278 0.24999996875000327
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.9090908429752097 0.4861110871620382 -0.0
yahoo_A4 0.7999999360000031 0.37638887231018564 -0.0
yahoo_A4 0.9090908429752097 4.083333052458349e-09 0.0
yahoo_A4 0.6666666074074105 0.1666666681944433 0.33333328888889363
yahoo_A4 0.6666666074074105 0.12500000187499916 0.0
yahoo_A4 0.28571425306122533 0.0 0.0
yahoo_A4 0.39999995200000177 0.12499998437500164 0.39999995200000493
yahoo_A4 0.4999999500000025 0.0 0.0
yahoo_A4 0.9090908429752097 0.0 0.0
yahoo_A4 0.7999999360000031 0.0 0.0
yahoo_A4 0.9999999333333361 0.7499999208333443 

In [8]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.475014,0.192696,0.201637
yahoo_A2,0.394078,0.106491,0.172961
yahoo_A3,0.731221,0.271597,0.193599
yahoo_A4,0.708371,0.206336,0.181673


### NASA

In [9]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [10]:
for loader in [load_nasa]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        model = CNN_VAE(X_train)
        
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2588265274197952 0.130779545894996 0.5126536849158938
D2 0.28464975199693254 0.17769140073605816 0.47706698066140557


In [11]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.258827,0.13078,0.512654
D2,0.28465,0.177691,0.477067


### SMD

In [12]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [13]:
for loader in [load_smd]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_VAE(X_train)
        
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.23153690558205123 0.06546275382269463 0.49999999900806086
smd 0.14358973018803536 0.051524977043008735 0.4204458726363449
smd 0.18333329170834262 0.09897133089320162 0.5170733698579677
smd 0.1739130063327107 0.15493154868859857 0.5585805328265103
smd 0.08311687510743873 0.02272194339105235 0.5152030181603926
smd 0.47835047887257154 0.26011977871826253 0.420672406991159
smd 0.29561198390945814 0.1497332197833547 0.4604838576047268
smd 0.23444973995673352 0.19217705628810036 0.5291982169388214
smd 0.21307504139204825 0.09788794504719664 0.4128161223261762
smd 0.29953914490114675 0.2875592806986414 0.5797697357568089
smd 0.12690354134994566 0.03418687277992775 0.3146802309601826
smd 0.29953914490114675 0.09983383503846552 0.16139170828286709
smd 0.26477539061415617 0.2181505348064992 0.5167846935337239
smd 0.09999998950000007 0.08503720991001343 0.4505925637295893
smd 0.23999997792000163 0.20675320376453563 0.47097447130341674
smd 0.8888888197530895 0.8013549525226726 0.899999981752

In [14]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.241954,0.165873,0.443584


### ECG

In [15]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [16]:
for loader in [load_ecg]:
    datasets = loader(64, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_VAE(X_train)
        
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.3783783368882437 0.18832833974714877 0.5782608625255199
D2 0.32558136657652986 0.047432305466370756 0.13839285473333862
D3 0.12195120565139933 0.050069384345644646 0.45704466529628857
D4 0.2999999730000019 0.027332144442154066 0.05952380831916099
D5 0.2857142600583109 0.05502330776494324 0.1364221339543902
D6 0.1904761701184196 0.07696890734615067 0.4078340945912705
D7 0.05079364581103594 0.0 0.0
D8 0.22471908067163407 0.09273817914080493 0.41381380903805615
D9 0.417910413455115 0.0 0.0


In [17]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.378378,0.188328,0.578261
D2,0.325581,0.047432,0.138393
D3,0.121951,0.050069,0.457045
D4,0.3,0.027332,0.059524
D5,0.285714,0.055023,0.136422
D6,0.190476,0.076969,0.407834
D7,0.050794,0.0,0.0
D8,0.224719,0.092738,0.413814
D9,0.41791,0.0,0.0


### Power Demand

In [18]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [19]:
for loader in [load_power_demand]:
    datasets = loader(512, 256)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_VAE(X_train)
        
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.7999999200000041 0.6666665994444505 0.6666666111111148


In [20]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.8,0.666667,0.666667


### 2D Gesture

In [21]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [22]:
for loader in [load_gesture]:
    datasets = loader(64, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]

        model = CNN_VAE(X_train)
        
        X_test_rec = model.decoder.predict(model.encoder.predict(X_test)[-1])
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.4380952030476216 0.1212413551714061 0.3873047219258263


In [23]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.438095,0.121241,0.387305
