In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def train_gan(gan, dataset, batch_size, codings_size, dim, n_epochs=50):
    generator, discriminator = gan.layers
    for epoch in range(n_epochs):
        for X_batch in dataset:
            X_batch = tf.cast(X_batch, tf.float32)
            # phase 1 - training the discriminator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            generated_images = generator(noise)
            X_fake_and_real = tf.concat([generated_images, X_batch], axis=0)
            y1 = tf.constant([[0.]] * batch_size + [[1.]] * batch_size)
            discriminator.trainable = True
            discriminator.train_on_batch(X_fake_and_real, y1)
            # phase 2 - training the generator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            y2 = tf.constant([[1.]] * batch_size)
            discriminator.trainable = False
            gan.train_on_batch(noise, y2)
            
def get_gan(x_train):
    generator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        layers.LSTM(128, return_sequences=True),
        layers.Dropout(rate=0.2),
        layers.LSTM(64),
        layers.RepeatVector(x_train.shape[1]),
        layers.LSTM(64, return_sequences=True),
        layers.Dropout(rate=0.2),
        layers.LSTM(128),
        layers.Dense(x_train.shape[1] * x_train.shape[2]),
        layers.Reshape([x_train.shape[1], x_train.shape[2]])
    ])

    discriminator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        keras.layers.LSTM(64, return_sequences=True),
        keras.layers.Dropout(0.2),
        keras.layers.LSTM(64),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(1, activation="sigmoid")
    ])

    gan = keras.models.Sequential([generator, discriminator])

    discriminator.compile(loss="binary_crossentropy", optimizer="adam")
    discriminator.trainable = False
    gan.compile(loss="binary_crossentropy", optimizer="adam")
    return gan

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/67 [00:00<?, ?it/s]

yahoo_A1 0.399999960000002 0.0833333340277771 0.24999997500000232
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998888889001
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.46153841893491365 0.14999999350000026 0.49999997619047704
yahoo_A1 0.6666666133333363 0.7374999426875043 0.774999966625001
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.6666666133333363 0.7055555016296338 0.7666666319444456
yahoo_A1 0.33333330000000166 0.09999999400000033 0.49999996875000174
yahoo_A1 0.8749999398437528 0.5833333087962973 0.32142855076530724
yahoo_A1 0.6666666133333363 0.6999999340000063 0.5999999760000007
yahoo_A1 0.9999999000000052 0.49999995000000386 0.9999999375000035
yahoo_A1 0.18181816198347142 0.049999994500000554 -0.0
yahoo_A1 0.2222221975308647 0.06249999296875071 -0.0
yahoo_A1 0.18181816198347142 0.049999994500000554 0.49999994444445006
yahoo_A1 0.6666666133333363 0.3284722104122303 0.23499998972500033
yahoo_A1 0.6153845633

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A3 0.33333330000000166 0.10714285089002298 0.24999997187500275
yahoo_A3 0.2222221975308647 0.11408728771790108 0.11904761218820895
yahoo_A3 0.6666666133333363 0.08333334744212759 -0.0
yahoo_A3 0.4999999562500026 0.0555555638271593 -0.0
yahoo_A3 0.7272726677685981 0.19999999796428533 -0.0
yahoo_A3 0.33333330000000166 0.055555557160493235 -0.0
yahoo_A3 0.7058822989619408 0.08333334794447264 -0.0
yahoo_A3 0.49999995416666926 0.21428570366496638 0.3611110888888901
yahoo_A3 0.5999999460000028 0.10000000768253806 -0.0
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.9411764096885841 0.9136904126367684 0.8571427563265408
yahoo_A3 0.9411764096885841 0.6027777485482548 0.11805554235146738
yahoo_A3 0.7777777216049413 0.18988095947960348 0.038690474620890064
yahoo_A3 0.24999997187500062 0.07142856326530694 0.3999999560000044
yahoo_A3 0.6666666133333363 0.08333334640872832 -0.0
yahoo_A3 0.15384613727810673 0.04166666215277823 -0.0
yahoo_A3 0.8749999398437528 0.16666668744377994 -0.0
yahoo_A3 0.4285713887755

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A4 0.399999960000002 0.07142857308673395 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.3636363272727291 0.07142857239228963 0.24999998125000128
yahoo_A4 0.399999960000002 0.07142857308673395 -0.0
yahoo_A4 0.33333330000000166 0.055555557160493235 0.06249999296875069
yahoo_A4 0.8888888296296326 0.2500000048263881 -0.0
yahoo_A4 0.6666666133333363 0.08333334744212757 -0.0
yahoo_A4 0.4285713887755125 0.05555556109988678 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.5882352456747433 0.28253966584232754 0.4809523524784594
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.8749999398437528 0.1666666874693846 -0.0
yahoo_A4 0.6666666133333363 0.14285714495488425 -0.0
yahoo_A4 0.8749999398437528 0.5416666235449782 0.45833328888889224
yahoo_A4 0.399999960000002 0.07142857308673395 -0.0
yahoo_A4 0.5454544958677713 0.1555555531488091 0.053333330755555644
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.18181816198347142 0.049999994500000554 -0.0
yahoo_A4 0.15384613727810673 0.04166666215277823 -0.0
yahoo_A4 0.33333330000000166 0.1041666615104

In [6]:
yahoo_results = pd.DataFrame(total_scores)

In [7]:
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.374761,0.189975,0.237475
yahoo_A2,0.0,0.0,0.0
yahoo_A3,0.550227,0.17407,0.101468
yahoo_A4,0.468156,0.134043,0.09571


### NASA

In [8]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [9]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.25300017462469454 0.06566003201892055 0.49999999989972377
D2 0.2699530282282525 0.05634807415274783 0.49999999928670286


In [10]:
nasa_results = pd.DataFrame(total_scores)

In [11]:
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.253,0.06566,0.5
D2,0.269953,0.056348,0.5


### SMD

In [12]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [13]:
for loader in [load_smd]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.21943571700356884 0.06161971811210573 0.49999999837062536
smd 0.1269841149911827 3.0033558376296576e-10 0.0
smd 0.1556420088722022 0.017777777947419755 0.49999999351958535
smd 0.1556420088722022 0.017777777946624655 0.49999999351958535
smd 0.06530611607830125 0.004545454997655452 0.5240174425776293
smd 0.4370860582693768 0.09134615381234508 0.4999999983900929
smd 0.2647058591911784 0.04954954951780259 0.49999999747727275
smd 0.19157086375713953 0.013824885197825789 0.4999999914297
smd 0.18461536771597778 0.02466367723813717 0.4999999952186965
smd 0.28363633908892766 0.06415929191431648 0.49999999802205497
smd 0.1044176607022476 0.00444444504793359 0.49999997477578606
smd 0.2710622474312033 0.5403536650695211 0.5135135104411657
smd 0.22556388959240378 0.027266638260141425 0.3749999966929612
smd 0.08026755077012628 0.008928571561911779 0.49999998981818194
smd 0.18461536771597778 0.00925925983089516 0.49999998726415124
smd 0.02499999751041683 0.004237288098040048 0.49999997478632613

In [14]:
smd_results = pd.DataFrame(total_scores)

In [15]:
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.161623,0.041386,0.436496
