In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def train_gan(gan, dataset, batch_size, codings_size, dim=1, n_epochs=50):
    generator, discriminator = gan.layers
    for epoch in range(n_epochs):
        for X_batch in dataset:
            X_batch = tf.cast(X_batch, tf.float32)
            # phase 1 - training the discriminator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            generated_images = generator(noise)
            X_fake_and_real = tf.concat([generated_images, X_batch], axis=0)
            y1 = tf.constant([[0.]] * batch_size + [[1.]] * batch_size)
            discriminator.trainable = True
            discriminator.train_on_batch(X_fake_and_real, y1)
            # phase 2 - training the generator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            y2 = tf.constant([[1.]] * batch_size)
            discriminator.trainable = False
            gan.train_on_batch(noise, y2)
            
def get_gan(x_train):
    generator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        keras.layers.Conv1D(128, kernel_size=3, padding='same', activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.MaxPool1D(3, padding='same'),
        keras.layers.Conv1D(64, kernel_size=3, padding='same', activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.MaxPool1D(3, padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.Conv1DTranspose(64, kernel_size=5, strides=2, padding="SAME", activation="selu"),
        keras.layers.BatchNormalization(),
        layers.Flatten(),
        layers.Dense(x_train.shape[1] * x_train.shape[2]),
        layers.Reshape([x_train.shape[1], x_train.shape[2]])
    ])

    discriminator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        keras.layers.Conv1D(64, kernel_size=5, strides=2, padding="SAME", activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.Dropout(0.4),
        keras.layers.Conv1D(128, kernel_size=5, strides=2, padding="SAME", activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.Dropout(0.4),
        keras.layers.Flatten(),
        keras.layers.Dense(1, activation="sigmoid")
    ])

    gan = keras.models.Sequential([generator, discriminator])

    discriminator.compile(loss="binary_crossentropy", optimizer="adam")
    discriminator.trainable = False
    gan.compile(loss="binary_crossentropy", optimizer="adam")
    return gan

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/67 [00:00<?, ?it/s]

yahoo_A1 0.49999995416666926 0.12499999905753925 0.4999999416666726
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998888889001
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.3636363272727291 0.11111110432098803 0.49999996785714457
yahoo_A1 0.9999999250000032 0.8999999351944488 0.9999999550000014
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.8888888197530895 0.8466666068277818 0.9399999599000013
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.8749999398437528 0.6916666016660125 0.6666666027777826
yahoo_A1 0.9999998500000123 0.49999991889882167 0.9999998900000105
yahoo_A1 0.9999999000000052 0.7499998875000145 0.9999999375000035
yahoo_A1 0.18181816198347142 0.049999994500000554 -0.0
yahoo_A1 0.3636363272727291 0.0625000016493049 -0.0
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998888889001
yahoo_A1 0.6666666133333363 0.2555555486079145 -0.0
yahoo_A1 0.6153845633136124 0.08333334384093748 -0.0
yahoo_A1 0.4999999500000025 0.100

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A3 0.19999997800000044 0.055555549382716674 -0.0
yahoo_A3 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A3 0.6153845633136124 0.08333334384093748 -0.0
yahoo_A3 0.4285713887755125 0.05555556109988678 -0.0
yahoo_A3 0.7999999413333363 0.2833333299136901 0.2499999812500011
yahoo_A3 0.33333330000000166 0.055555557160493235 -0.0
yahoo_A3 0.7058822989619408 0.27838009531114083 -0.0
yahoo_A3 0.49999995416666926 0.07142857794784456 -0.0
yahoo_A3 0.666666611111114 0.10000001155158537 -0.0
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.9411764096885841 0.2500000204683213 -0.0
yahoo_A3 0.9411764096885841 0.2500000204683213 -0.0
yahoo_A3 0.749999943750003 0.10000001611639998 -0.0
yahoo_A3 0.16666664861111138 0.045454540495868265 -0.0
yahoo_A3 0.7058822989619408 0.08333334918455176 -0.0
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.8571427959183704 0.16666668561507636 -0.0
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.33333330000000166 0.055555557160493235 -0.0
yahoo_A3 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A3 0.0 

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A4 0.5714285224489823 0.0714285810033999 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.4285713887755125 0.05555556109988678 -0.0
yahoo_A4 0.5714285224489823 0.07142858100339988 -0.0
yahoo_A4 0.19999997800000044 0.055555549382716674 -0.0
yahoo_A4 0.8749999398437528 0.1666666874437799 -0.0
yahoo_A4 0.6153845633136124 0.08333334384093748 -0.0
yahoo_A4 0.4999999562500026 0.05555556362513912 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.3636363272727291 0.0625000016493049 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.8749999398437528 0.3952380815018904 0.1999999860000008
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.8749999398437528 0.1666666874338593 -0.0
yahoo_A4 0.49999995416666926 0.07142857794784456 -0.0
yahoo_A4 0.6153845633136124 0.08333334384093748 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.3999999626666689 0.05000000514393849 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.4285713887755125 0.05555556109988678 -0.0
yahoo_A4 0.8333332708333363 0.16666668317460023 -0.0
yahoo_A4 0.6666666074074105 0.1250000081249981 -0.0
yahoo_A4 

In [6]:
yahoo_results = pd.DataFrame(total_scores)

In [7]:
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.471302,0.251025,0.318103
yahoo_A2,0.0,0.0,0.0
yahoo_A3,0.549188,0.129507,0.041954
yahoo_A4,0.452129,0.101775,0.021734


### NASA

In [8]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [9]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.32346086604438307 0.1600767402035157 0.6077561894303012
D2 0.2744629834145987 0.10862153799601292 0.35991304497352505


In [10]:
nasa_results = pd.DataFrame(total_scores)

In [11]:
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.323461,0.160077,0.607756
D2,0.274463,0.108622,0.359913


### SMD

In [12]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [13]:
for loader in [load_smd]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.21943571700356884 0.5598591033089078 0.5142857126097861
smd 0.1269841149911827 0.028842153678961244 0.20873856392188728
smd 0.1556420088722022 0.09185448369881605 0.5002873074412524
smd 0.21428567193878317 0.19194169089188812 0.49500418503963173
smd 0.5454544958677713 0.33228459963464496 0.5228477771657705
smd 0.6105262720886456 0.6111801372271136 0.682389447765869
smd 0.2647058591911784 0.09651838720162727 0.3891706049077417
smd 0.19157086375713953 0.11702812683831507 0.42406210660722543
smd 0.18461536771597778 0.10511829671107717 0.2752995108563976
smd 0.28363633908892766 0.31254086908728773 0.46595875430222145
smd 0.1044176607022476 0.012764994011085882 0.13246850010897723
smd 0.4054053556245493 0.31295654103274284 0.6429774124814169
smd 0.23529409550173158 0.18749939197480625 0.45710478932683557
smd 0.1739129920605058 0.1017597278404407 0.6872313971754582
smd 0.2857142591836752 0.17214713133715798 0.3333660019410105
smd 0.7999999200000041 0.7166665922777845 0.995014211421885


In [14]:
smd_results = pd.DataFrame(total_scores)

In [15]:
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.311911,0.236822,0.495014
