In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def train_gan(gan, dataset, batch_size, codings_size, dim, n_epochs=50):
    generator, discriminator = gan.layers
    for epoch in range(n_epochs):
        for X_batch in dataset:
            X_batch = tf.cast(X_batch, tf.float32)
            # phase 1 - training the discriminator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            generated_images = generator(noise)
            X_fake_and_real = tf.concat([generated_images, X_batch], axis=0)
            y1 = tf.constant([[0.]] * batch_size + [[1.]] * batch_size)
            discriminator.trainable = True
            discriminator.train_on_batch(X_fake_and_real, y1)
            # phase 2 - training the generator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            y2 = tf.constant([[1.]] * batch_size)
            discriminator.trainable = False
            gan.train_on_batch(noise, y2)
            
def get_gan(x_train):
    generator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        layers.GRU(128, return_sequences=True),
        layers.Dropout(rate=0.2),
        layers.GRU(64),
        layers.RepeatVector(x_train.shape[1]),
        layers.GRU(64, return_sequences=True),
        layers.Dropout(rate=0.2),
        layers.GRU(128),
        layers.Dense(x_train.shape[1] * x_train.shape[2]),
        layers.Reshape([x_train.shape[1], x_train.shape[2]])
    ])

    discriminator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        keras.layers.GRU(64, return_sequences=True),
        keras.layers.Dropout(0.2),
        keras.layers.GRU(64),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(1, activation="sigmoid")
    ])

    gan = keras.models.Sequential([generator, discriminator])

    discriminator.compile(loss="binary_crossentropy", optimizer="adam")
    discriminator.trainable = False
    gan.compile(loss="binary_crossentropy", optimizer="adam")
    return gan

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.03738317383177589 0.00480769253887533 -0.0
yahoo_A1 0.05504586610554703 0.014926318841405592 0.09546925238843343
yahoo_A1 0.6666666074074105 0.35161276324797885 0.4999999911666669
yahoo_A1 0.9999998500000123 0.0 0.9999998990476292
yahoo_A1 0.03703703333333351 0.004761904990972433 -0.0
yahoo_A1 0.03703703333333351 0.004761904990972433 -0.0
yahoo_A1 0.7999999200000041 0.3607005639602226 0.9269840821859433
yahoo_A1 0.9666666134444469 0.9410052576018082 0.9731111065431062
yahoo_A1 0.9206348678256514 0.9375203532457317 0.9851111064288685
yahoo_A1 0.8888888197530895 0.628148105828666 0.9514851247559559
yahoo_A1 0.10810809768687688 0.007500000949671265 -0.0
yahoo_A1 0.7499999343750033 0.43208311873845845 0.817749977641834
yahoo_A1 0.9999999437500025 0.937499976331382 0.9999999926264044
yahoo_A1 0.7999999200000041 0.38095234093123503 0.9607842660515213
yahoo_A1 0.7499999343750033 0.41270435077147205 0.6446601792155406
yahoo_A1 0.9999999409090937 0.9090908783316949 0.9999999898564595

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.041450773099949295 0.0026881725513145803 -0.0
yahoo_A2 0.041450773099949295 0.0026881725513145803 -0.0
yahoo_A2 0.041450773099949295 0.0026881725513145803 -0.0
yahoo_A2 0.06829267626412905 0.005194840631162036 -0.0
yahoo_A2 0.015267174034147195 0.0038461534585799203 -0.0
yahoo_A2 0.041450773099949295 0.0026881725513145803 -0.0
yahoo_A2 0.06829267626412905 0.002604167445975811 -0.0
yahoo_A2 0.015267174034147195 0.0038461534585799203 -0.0
yahoo_A2 0.041450773099949295 0.0026881725513145803 -0.0
yahoo_A2 0.06829267626412905 0.002604167445975811 -0.0
yahoo_A2 0.015267174034147195 0.0038461534585799203 -0.0
yahoo_A2 0.041450773099949295 0.0026881725513145803 -0.0
yahoo_A2 0.06829267626412905 0.00775743327644073 0.25109073794845516
yahoo_A2 0.06829267626412905 0.005194840631162036 -0.0
yahoo_A2 0.015267174034147195 0.0038461534585799203 -0.0
yahoo_A2 0.041450773099949295 0.0026881725513145803 -0.0
yahoo_A2 0.4444444000000022 0.17648263206431553 0.8339441171644995
yahoo_A2 0.029411

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A3 0.2222221975308647 0.03780108303333946 0.4923542547135603
yahoo_A3 0.09230768336094745 0.004201681792727941 -0.0
yahoo_A3 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A3 0.14925371730897866 0.004347827665800764 -0.0
yahoo_A3 0.0624999938476567 0.008699633897400685 0.30624997193229436
yahoo_A3 0.17647057188581453 0.013918775729983063 0.09876394430481163
yahoo_A3 0.16666661388890391 0.022771735991232545 0.2724576162655733
yahoo_A3 0.22222217283951495 0.03821479108419271 0.37372879329980757
yahoo_A3 0.18181816198347142 0.01675838899215872 0.27043997982019824
yahoo_A3 0.14925371730897866 0.03204090696920303 0.22733569514427907
yahoo_A3 0.18181816198347142 0.029216649001128452 0.3092592527930398
yahoo_A3 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A3 0.17647057188581453 0.009114960738280987 0.0973426816573804
yahoo_A3 0.04724408980097991 0.004098361191078604 -0.0
yahoo_A3 0.18181816198347142 0.02982207312185124 0

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.09230768336094745 0.004201681792727941 -0.0
yahoo_A4 0.03174602857142873 0.004065040847075319 -0.0
yahoo_A4 0.0624999938476567 0.0041322321806666834 -0.0
yahoo_A4 0.39999995200000177 0.012112396473658325 0.6059027288556178
yahoo_A4 0.12121210964187425 0.010544783315222005 0.22068964342158073
yahoo_A4 0.12499995156251753 0.016528677108842656 0.24015803706296296
yahoo_A4 0.0624999938476567 0.006338433258920479 0.024999997895833487
yahoo_A4 0.07751937227330145 0.008264463152590763 -0.0
yahoo_A4 0.1999999480000115 0.03144884793326277 0.36370054703641314
yahoo_A4 0.0624999938476567 0.01015005777444636 0.29583330556944715
yahoo_A4 0.12121210964187425 0.004273505647355304 -0.0
yahoo_A4 0.18181813223141666 0.03236661227983009 0.6211805041495997
yahoo_A4 0.0624999938476567 0.011377274002312003 0.14305554962615766
yahoo_A4 0.09230768336094745 0.004201681792727941 -0.0
yahoo_A4 0.03174602857142873 0.004201680855047808 0.03278688194033898
yahoo_A4 0.03174602857142873 0.00406504084707531

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.488035,0.299807,0.544554
yahoo_A2,0.149806,0.043063,0.206261
yahoo_A3,0.166101,0.026351,0.212825
yahoo_A4,0.123234,0.013497,0.136644


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.2295534087061448 0.10054492275809881 0.3736609311633924
D2 0.19498578202833744 0.36883052651740206 0.5131809255860225


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.229553,0.100545,0.373661
D2,0.194986,0.368831,0.513181


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(8, 4)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.17627464443920743 0.5532991977145021 0.5087209301507045
smd 0.5481480986310059 0.4259421627959828 0.8375471984832733
smd 0.07773848243330729 0.059624174098404335 0.3868589298291686
smd 0.09433961356354652 0.06895944573367709 0.33102278232655397
smd 0.6551723644470901 0.6382942064942079 0.8864078011490509
smd 0.8273877527484236 0.8265679846490493 0.8812277955484978
smd 0.1906216415238548 0.1247017596178504 0.4679806759112928
smd 0.2588235066758958 0.19629561233188014 0.586048432840561
smd 0.34715816814779354 0.29463362114907016 0.7826940600216972
smd 0.21837868410447125 0.2041617254668438 0.5088235636311222
smd 0.34482754644768576 0.4159207787172737 0.8278167977193284
smd 0.31284913550347165 0.23810251769053353 0.3752013740806507
smd 0.2389609987006308 0.23474571424765706 0.7973527135463029
smd 0.45751630398564924 0.3932709571190476 0.6612469380247212
smd 0.6814158814550898 0.6113122055182829 0.8810666418082104
smd 0.9629629106538662 0.9296134952390191 0.979060407943543
smd 0.5151

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.414688,0.401967,0.694218


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.4210525813875098 0.3578865019087283 0.7668660624922247
D2 0.4166666162344164 0.3424245336460199 0.7531017832194177
D3 0.11734690745783838 0.05144644042821206 0.5301683670705016
D4 0.24608498622184435 0.14999305654866596 0.5900093135255483
D5 0.36792448340602174 0.23728753952082596 0.7083972332979065
D6 0.20300749777105886 0.10099505348921684 0.5869722645103785
D7 0.04649986871269535 0.016385874366508405 0.4171229992882579
D8 0.18743959152484563 0.08171768584150915 0.4626302628521304
D9 0.45519199456505577 0.29900534345303076 0.6721090239727271


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.421053,0.357887,0.766866
D2,0.416667,0.342425,0.753102
D3,0.117347,0.051446,0.530168
D4,0.246085,0.149993,0.590009
D5,0.367924,0.237288,0.708397
D6,0.203007,0.100995,0.586972
D7,0.0465,0.016386,0.417123
D8,0.18744,0.081718,0.46263
D9,0.455192,0.299005,0.672109


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(16, 8)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.4166666180555608 0.3270262158593743 0.6852466917759956


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.416667,0.327026,0.685247


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(4, 2)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)


        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.41320429272434867 0.2402366166809034 0.4903753928437219


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.413204,0.240237,0.490375
