In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def train_gan(gan, dataset, batch_size, codings_size, dim, n_epochs=50):
    generator, discriminator = gan.layers
    for epoch in range(n_epochs):
        for X_batch in dataset:
            X_batch = tf.cast(X_batch, tf.float32)
            # phase 1 - training the discriminator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            generated_images = generator(noise)
            X_fake_and_real = tf.concat([generated_images, X_batch], axis=0)
            y1 = tf.constant([[0.]] * batch_size + [[1.]] * batch_size)
            discriminator.trainable = True
            discriminator.train_on_batch(X_fake_and_real, y1)
            # phase 2 - training the generator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            y2 = tf.constant([[1.]] * batch_size)
            discriminator.trainable = False
            gan.train_on_batch(noise, y2)
            
def get_gan(x_train):
    generator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        layers.LSTM(128, return_sequences=True),
        layers.Dropout(rate=0.2),
        layers.LSTM(64),
        layers.RepeatVector(x_train.shape[1]),
        layers.LSTM(64, return_sequences=True),
        layers.Dropout(rate=0.2),
        layers.LSTM(128),
        layers.Dense(x_train.shape[1] * x_train.shape[2]),
        layers.Reshape([x_train.shape[1], x_train.shape[2]])
    ])

    discriminator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        keras.layers.LSTM(64, return_sequences=True),
        keras.layers.Dropout(0.2),
        keras.layers.LSTM(64),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(1, activation="sigmoid")
    ])

    gan = keras.models.Sequential([generator, discriminator])

    discriminator.compile(loss="binary_crossentropy", optimizer="adam")
    discriminator.trainable = False
    gan.compile(loss="binary_crossentropy", optimizer="adam")
    return gan

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.4999999375000028 0.16666664444444684 0.33333328888889363
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.33333329444444565 0.09999998800000123 0.49999993750000654
yahoo_A1 0.7999999200000041 0.6666665777777874 0.9166665902777827
yahoo_A1 0.7999999200000041 0.5416666027777841 0.8333332638888934
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.8888888197530895 0.3999999820000006 0.49999993750000654
yahoo_A1 0.5714285142857172 0.1999999860000008 0.49999995833333605
yahoo_A1 0.9999998500000123 0.49999990000001493 0.9999998750000131
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.33333329444444565 0.09999998800000123 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.7499999343750033 0.3916666422361123 0.1666666527777787
yahoo_A1 0.6666666000000033 0.16666666527777654 -0.0
yahoo_A1 0.4999999375000028 0.16666664444444684 -0.0
yahoo_A1 0.7499999343750033 0.16666667444444216 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.7499999343750033 0.2

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.46153841893491365 0.10714285727891107 0.17857140994898132
yahoo_A2 0.46153841893491365 0.17797618257504277 0.20238094214852648
yahoo_A2 0.3636363272727291 0.0625000016493049 -0.0
yahoo_A2 0.4285713887755125 0.09999999793939388 0.18749998828125064
yahoo_A2 0.24999997187500062 0.07142856326530694 -0.0
yahoo_A2 0.46153841893491365 0.11111110867283944 -0.0
yahoo_A2 0.4285713887755125 0.05555556109988678 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.46153841893491365 0.06250000600115631 -0.0
yahoo_A2 0.4285713887755125 0.14682539096887257 0.22916665512152826
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.46153841893491365 0.1694444367731484 0.4642856725340172
yahoo_A2 0.4285713887755125 0.15277777039772764 0.4583332873263933
yahoo_A2 0.33333330000000166 0.055555557160493235 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.46153841893491365 0.16550925207401002 0.2857142673469398
yahoo_A2 0.4285713887755125 0.055555562177327655 0.06249999296875069
yahoo_A2 0.24999997187500062 0.07142856326530694 0.0833333236111121

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.33333329444444565 0.09999998800000123 -0.0
yahoo_A3 0.4999999500000025 0.18333332272222258 0.2499999812500011
yahoo_A3 0.6666666000000033 0.16666666527777654 -0.0
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.7999999200000041 0.24999999166666562 -0.0
yahoo_A3 0.4999999500000025 0.10000000133333238 -0.0
yahoo_A3 0.7499999343750033 0.3361110931620377 -0.0
yahoo_A3 0.5714285142857172 0.1666666606944441 0.33333328888889363
yahoo_A3 0.6666666000000033 0.16666666527777654 -0.0
yahoo_A3 0.9999999166666703 0.49999996666666835 0.0
yahoo_A3 0.8571427836734729 0.2500000034722198 -0.0
yahoo_A3 0.8571427836734729 0.43749997065972396 -0.0
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.6666666000000033 0.16666666527777654 -0.0
yahoo_A3 0.28571425306122533 0.08333332361111209 -0.0
yahoo_A3 0.7999999200000041 0.24999999166666562 -0.0
yahoo_A3 0.33333329444444565 0.09999998800000123 -0.0
yahoo_A3 0.33333329444444565 0.09999998800000123 -0.0
yahoo_A3 0.39999995200000177 0.12499998437500164 -0.0
yahoo_A3 0.0 0.0 -0.0
yah

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.5714285142857172 0.1250000006249989 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.5714285142857172 0.1250000006249989 -0.0
yahoo_A4 0.4999999500000025 0.10000000133333238 -0.0
yahoo_A4 0.7999999200000041 0.4374999588541698 -0.0
yahoo_A4 0.6666666000000033 0.16666666527777654 -0.0
yahoo_A4 0.33333329444444565 0.09999998800000123 -0.0
yahoo_A4 0.4999999500000025 0.10000000133333238 -0.0
yahoo_A4 0.7999999200000041 0.24999999166666562 -0.0
yahoo_A4 0.4999999375000028 0.16666664444444684 -0.0
yahoo_A4 0.8571427836734729 0.33333331736111155 -0.0
yahoo_A4 0.5714285142857172 0.1250000006249989 -0.0
yahoo_A4 0.6666666074074105 0.1250000081249981 -0.0
yahoo_A4 0.28571425306122533 0.08333332361111209 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.33333329444444565 0.09999998800000123 -0.0
yahoo_A4 0.9090908429752097 0.33333332790277737 -0.0
yahoo_A4 0.7499999343750033 0.37499997687500114 0.29166664027777967
yahoo_A4 0.999999930000003 0.4999999800000005 0.0
yahoo_A4 0.8571427836734729 0.250000003472219

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.445762,0.213839,0.267113
yahoo_A2,0.317147,0.073652,0.076786
yahoo_A3,0.582216,0.196996,0.067029
yahoo_A4,0.547526,0.174136,0.029302


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.25769180223378857 0.06543155417696649 0.4999999999353614
D2 0.28464975199693254 0.055966697492007764 0.49999999953469354


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.257692,0.065432,0.5
D2,0.28465,0.055967,0.5


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader(128, 64)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.23153690558205123 0.06546275382269463 0.49999999900806086
smd 0.14105792132682896 1.924824640674625e-10 0.0
smd 0.18226599319687592 0.018840579856612187 0.4999999960032438
smd 0.17326731082124444 0.017341040623997415 0.49999999568363274
smd 0.08311687510743873 0.002816901909780309 0.49999997485835823
smd 0.47835047887257154 0.09455128206172082 0.49999999895491387
smd 0.29561198390945814 0.05539358599092001 0.49999999852027605
smd 0.23444973995673352 0.018072289407687972 0.49999999567708336
smd 0.21307504139204825 0.028985507315774692 0.49999999734615375
smd 0.29953914490114675 0.06321839075384354 0.4999999986991626
smd 0.12690354134994566 0.007163324092339451 0.4999999898546513
smd 0.29953914490114675 0.03799392108596349 0.4999999978355263
smd 0.2635293887601403 0.02286585388381453 0.4999999965069223
smd 0.08547007725180877 0.0069124425589984296 0.49999999154984437
smd 0.21307504139204825 0.0075757581138413774 0.49999998984615396
smd 0.026737965262089488 0.006775067613340092 0.49

In [12]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.179698,0.024399,0.47437


### ECG

In [13]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [14]:
for loader in [load_ecg]:
    datasets = loader(64, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

D1 0.46153841893491365 0.3206663754191511 0.5608695579908634
D2 0.3043477989603044 0.17650912903199803 0.3757440402834247
D3 0.11009173251409898 0.02450980358613006 0.4999999894845363
D4 0.2999999730000019 0.17987243076746443 0.352678563227041
D5 0.27450977916186264 0.10227802413473001 0.30823680288237776
D6 0.2352940959630927 0.11917614182145352 0.5881336304824376
D7 0.05079364581103594 0.009836065500954918 0.4999999914994427
D8 0.21276593798098847 0.09216036011906095 0.43252627040428043
D9 0.424242389531683 0.15913023814688546 0.30128204806224756


In [15]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.461538,0.320666,0.56087
D2,0.304348,0.176509,0.375744
D3,0.110092,0.02451,0.5
D4,0.3,0.179872,0.352679
D5,0.27451,0.102278,0.308237
D6,0.235294,0.119176,0.588134
D7,0.050794,0.009836,0.5
D8,0.212766,0.09216,0.432526
D9,0.424242,0.15913,0.301282


### Power Demand

In [16]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [17]:
for loader in [load_power_demand]:
    datasets = loader(512, 256)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

  0%|          | 0/1 [00:00<?, ?it/s]

power_demand 0.7499999343750033 0.3499999800833342 -0.0


In [18]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
power_demand,0.75,0.35,0.0


### 2D Gesture

In [19]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [20]:
for loader in [load_gesture]:
    datasets = loader(64, 32)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)


        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

  0%|          | 0/1 [00:00<?, ?it/s]

gesture 0.42201831455264965 0.13372092949567335 0.49999999703243614


In [21]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gesture,0.422018,0.133721,0.5
