In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def train_gan(gan, dataset, batch_size, codings_size, dim=1, n_epochs=50):
    generator, discriminator = gan.layers
    for epoch in range(n_epochs):
        for X_batch in dataset:
            X_batch = tf.cast(X_batch, tf.float32)
            # phase 1 - training the discriminator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            generated_images = generator(noise)
            X_fake_and_real = tf.concat([generated_images, X_batch], axis=0)
            y1 = tf.constant([[0.]] * batch_size + [[1.]] * batch_size)
            discriminator.trainable = True
            discriminator.train_on_batch(X_fake_and_real, y1)
            # phase 2 - training the generator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            y2 = tf.constant([[1.]] * batch_size)
            discriminator.trainable = False
            gan.train_on_batch(noise, y2)
            
def get_gan(x_train):
    generator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        keras.layers.Conv1D(128, kernel_size=3, padding='same', activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.MaxPool1D(3, padding='same'),
        keras.layers.Conv1D(64, kernel_size=3, padding='same', activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.MaxPool1D(3, padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.Conv1DTranspose(64, kernel_size=5, strides=2, padding="SAME", activation="selu"),
        keras.layers.BatchNormalization(),
        layers.Flatten(),
        layers.Dense(x_train.shape[1] * x_train.shape[2]),
        layers.Reshape([x_train.shape[1], x_train.shape[2]])
    ])

    discriminator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        keras.layers.Conv1D(64, kernel_size=5, strides=2, padding="SAME", activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.Dropout(0.4),
        keras.layers.Conv1D(128, kernel_size=5, strides=2, padding="SAME", activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.Dropout(0.4),
        keras.layers.Flatten(),
        keras.layers.Dense(1, activation="sigmoid")
    ])

    gan = keras.models.Sequential([generator, discriminator])

    discriminator.compile(loss="binary_crossentropy", optimizer="adam")
    discriminator.trainable = False
    gan.compile(loss="binary_crossentropy", optimizer="adam")
    return gan

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9999999000000052 0.4999999375000063 0.9999999490291288
yahoo_A1 0.9999999166666703 0.6666666055555603 0.9999999656957942
yahoo_A1 0.9999999333333361 0.8333332883333356 0.9999999823333335
yahoo_A1 0.9999998500000123 0.0 0.9999998990476293
yahoo_A1 0.666666577777783 0.0714285604036722 0.9711537495469771
yahoo_A1 0.666666577777783 0.012499998803543466 0.812499927343757
yahoo_A1 0.9999999166666703 0.6666666055555603 0.999999965714287
yahoo_A1 0.7999999488000028 0.7311733220514463 0.724280237367812
yahoo_A1 0.8679244758988992 0.8014827903038587 0.7988888851607409
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790099013
yahoo_A1 0.5714285142857172 0.2545750093905189 0.47121210953160414
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999790000004
yahoo_A1 0.9999999437500025 0.937499976331382 0.9999999926264046
yahoo_A1 0.05555555005144069 0.004854369563728194 -0.0
yahoo_A1 0.5714285142857172 0.32329362750593327 0.8898058026870822
yahoo_A1 0.9999999409090937 0.9090908783

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.041450773099949295 0.004587156312713244 0.41621617824349033
yahoo_A2 0.39999995200000177 0.004398603064020275 0.3119369235382181
yahoo_A2 0.8571427836734729 0.5107271486572443 0.8945945717461656
yahoo_A2 0.9999999357142885 0.8571428160204101 0.9999999851907256
yahoo_A2 0.015267174034147195 0.0038461534585799203 -0.0
yahoo_A2 0.9999999250000032 0.7499999447916703 0.9999999744594601
yahoo_A2 0.9230768591716004 0.8119047226103573 0.9977561557549948
yahoo_A2 0.035714282079081644 0.009090908165289348 0.5852712588486328
yahoo_A2 0.39999995200000177 0.11358892537888296 0.9344594355928603
yahoo_A2 0.5999999460000028 0.33450785208228523 0.7700074680283112
yahoo_A2 0.015267174034147195 0.0038461534585799203 -0.0
yahoo_A2 0.39999995200000177 0.08063949580048987 0.8905405177956542
yahoo_A2 0.28571423163266185 0.06339127322294888 0.6345050992280555
yahoo_A2 0.4444444000000022 0.25698949183901415 0.8952879448566182
yahoo_A2 0.9999998500000123 0.0 0.9999998992248164
yahoo_A2 0.041450773099

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A3 0.12121210964187425 0.01974842923815237 0.18658661422900238
yahoo_A3 0.09230768336094745 0.004201681792727941 -0.0
yahoo_A3 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A3 0.14925371730897866 0.019969038178359794 0.030116958445316498
yahoo_A3 0.0624999938476567 0.00825771756282842 0.17986110250057918
yahoo_A3 0.17647057188581453 0.01998152186135664 0.13234931926078558
yahoo_A3 0.09230768336094745 0.014632117409441031 0.32676551787409375
yahoo_A3 0.09230768336094745 0.010103624051070063 0.06214688968367983
yahoo_A3 0.14925371730897866 0.010293778744667217 0.21876564846140403
yahoo_A3 0.20512816857331304 0.08118766384535817 0.5873155021558615
yahoo_A3 0.14925371730897866 0.017864500445673386 0.19729531659143207
yahoo_A3 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A3 0.17647057188581453 0.009272006637270286 0.11432290914343132
yahoo_A3 0.04724408980097991 0.004098361191078604 -0.0
yahoo_A3 0.18181816198347142 0

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.28571425306122533 0.011420700180499392 0.2658192008954925
yahoo_A4 0.03278688196721328 0.006430270467727325 0.057377045854609135
yahoo_A4 0.0624999938476567 0.0041322321806666834 -0.0
yahoo_A4 0.0624999938476567 0.0041322321806666834 -0.0
yahoo_A4 0.2222221975308647 0.08208720397350067 0.32902298069982816
yahoo_A4 0.399999960000002 0.18179915248903272 0.681711808433635
yahoo_A4 0.0624999938476567 0.00434782684073941 0.049999994958333877
yahoo_A4 0.07751937227330145 0.0041666676308108625 -0.0
yahoo_A4 0.09230768336094745 0.004201681792727941 -0.0
yahoo_A4 0.0624999938476567 0.009530456328873532 0.28888886624074267
yahoo_A4 0.12121210964187425 0.012409743765585747 0.1414819345287545
yahoo_A4 0.0624999938476567 0.006024096968220815 0.3249999672291699
yahoo_A4 0.39999995200000177 0.015891911524055946 0.7104166028281311
yahoo_A4 0.09230768336094745 0.004201681792727941 -0.0
yahoo_A4 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A4 0.03174602857142873 0.004065040847075319 -0

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.643233,0.37642,0.703409
yahoo_A2,0.61913,0.289368,0.770061
yahoo_A3,0.145143,0.025431,0.259245
yahoo_A4,0.154784,0.025018,0.240991


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.22959238615285713 0.10578600097689682 0.4060981814678738
D2 0.19498578202833744 0.5197017262914015 0.5131564973968686


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.229592,0.105786,0.406098
D2,0.194986,0.519702,0.513156


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.17627464443920743 0.5547526859782113 0.5087209301507045
smd 0.48333328418981975 0.3960578279804751 0.9213119765570672
smd 0.21787705223347487 0.1271896809130644 0.6378855400592636
smd 0.08490565130829565 0.08867658993050297 0.5156594112124188
smd 0.6428570936862279 0.6332732887699951 0.8731465958719061
smd 0.8453487880523284 0.8305473926109066 0.8881457032278264
smd 0.20510848711435883 0.10684698794367055 0.510745920415451
smd 0.2528735376051463 0.18516718660612091 0.4745055242511237
smd 0.28150129170087596 0.23325551725714797 0.7712309036437404
smd 0.21837868410447125 0.19520608424523347 0.5082585549074803
smd 0.39639635980846033 0.4312961067433132 0.7436721355474575
smd 0.3153988598910256 0.2464834538117866 0.4662130477590333
smd 0.2314049226844002 0.19847980577846466 0.7287217644797648
smd 0.4749999612890655 0.41000235546923003 0.66976270101054
smd 0.6886791991901062 0.6043176098253326 0.8570810982009938
smd 0.8333332824074101 0.6972903701503503 0.9206127374560418
smd 0.493506

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.407773,0.399074,0.700462


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.29473680110803885 0.29154856821772457 0.47672388418958855
D2 0.3067484158831803 0.2599866856095871 0.4721378688804131
D3 0.19883037467255482 0.09061584639557055 0.6689325277667417
D4 0.22135920086869865 0.14189709281619742 0.5449852707224088
D5 0.361445733923653 0.26612026862370397 0.7060620238817772
D6 0.20430105313764838 0.08873332729108206 0.5789873768837633
D7 0.05868117979594257 0.022319115666796906 0.5505981358372001
D8 0.19536421917131014 0.09447263103723356 0.5140532030179902
D9 0.3583489386236379 0.16926131720401322 0.27037343119972024


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.294737,0.291549,0.476724
D2,0.306748,0.259987,0.472138
D3,0.19883,0.090616,0.668933
D4,0.221359,0.141897,0.544985
D5,0.361446,0.26612,0.706062
D6,0.204301,0.088733,0.578987
D7,0.058681,0.022319,0.550598
D8,0.195364,0.094473,0.514053
D9,0.358349,0.169261,0.270373


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(16, 8)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.2631578716528182 0.09889063343181889 0.3180992930135566


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.263158,0.098891,0.318099


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.41316682301739205 0.23180371104955444 0.4679848519623102


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.413167,0.231804,0.467985
