In [1]:
import os
import random

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from evaluator import evaluate
from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tensorflow import keras
from tensorflow.keras import layers
from tqdm.notebook import tqdm

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [3]:
def train_gan(gan, dataset, batch_size, codings_size, dim=1, n_epochs=50):
    generator, discriminator = gan.layers
    for epoch in range(n_epochs):
        for X_batch in dataset:
            X_batch = tf.cast(X_batch, tf.float32)
            # phase 1 - training the discriminator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            generated_images = generator(noise)
            X_fake_and_real = tf.concat([generated_images, X_batch], axis=0)
            y1 = tf.constant([[0.]] * batch_size + [[1.]] * batch_size)
            discriminator.trainable = True
            discriminator.train_on_batch(X_fake_and_real, y1)
            # phase 2 - training the generator
            noise = tf.random.normal(shape=[batch_size, codings_size, dim])
            y2 = tf.constant([[1.]] * batch_size)
            discriminator.trainable = False
            gan.train_on_batch(noise, y2)
            
def get_gan(x_train):
    generator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        keras.layers.Conv1D(128, kernel_size=3, padding='same', activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.MaxPool1D(3, padding='same'),
        keras.layers.Conv1D(64, kernel_size=3, padding='same', activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.MaxPool1D(3, padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.Conv1DTranspose(64, kernel_size=5, strides=2, padding="SAME", activation="selu"),
        keras.layers.BatchNormalization(),
        layers.Flatten(),
        layers.Dense(x_train.shape[1] * x_train.shape[2]),
        layers.Reshape([x_train.shape[1], x_train.shape[2]])
    ])

    discriminator = keras.models.Sequential([
        layers.InputLayer(input_shape=(x_train.shape[1], x_train.shape[2])),
        keras.layers.Conv1D(64, kernel_size=5, strides=2, padding="SAME", activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.Dropout(0.4),
        keras.layers.Conv1D(128, kernel_size=5, strides=2, padding="SAME", activation=keras.layers.LeakyReLU(0.2)),
        keras.layers.Dropout(0.4),
        keras.layers.Flatten(),
        keras.layers.Dense(1, activation="sigmoid")
    ])

    gan = keras.models.Sequential([generator, discriminator])

    discriminator.compile(loss="binary_crossentropy", optimizer="adam")
    discriminator.trainable = False
    gan.compile(loss="binary_crossentropy", optimizer="adam")
    return gan

### Yahoo S5

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
    
        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

yahoo_A1 0.9848484333562928 0.9757996475817536 0.9953163372185636
yahoo_A1 0.9999999416666693 0.9166666376815789 0.9999999913849766
yahoo_A1 0.9545454003099199 0.9010568791744731 0.9572168813031016
yahoo_A1 0.999999930000003 0.7999999503333362 0.9999999797237572
yahoo_A1 0.24999997187500062 0.03299771501067749 0.7710581853540557
yahoo_A1 0.03743315138694303 0.001385041968961337 -0.0
yahoo_A1 0.999999948484851 0.9848484768688912 0.9999999981601733
yahoo_A1 0.5153373848470049 0.14473831278516242 -0.0
yahoo_A1 0.5153373848470049 0.06079786669391572 0.00559643646917141
yahoo_A1 0.9999999487179513 0.9743589683847852 0.9999999983719279
yahoo_A1 0.643216036120303 0.6496509506949223 0.6510851443919385
yahoo_A1 0.9937887686431877 0.9857732438840159 0.9993629437996161
yahoo_A1 0.9999999483870992 0.9838709593728258 0.9999999980537635
yahoo_A1 0.3478260499054851 0.14705106008300048 0.42449053169478923
yahoo_A1 0.9999999487013013 0.9870129799892682 0.9999999983645983
yahoo_A1 0.99999994761905 0.976

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.17493470983952586 0.014827576658557334 0.08493734027832656
yahoo_A2 0.17493470983952586 0.018368078041620845 0.09147291904243998
yahoo_A2 0.9062499486938504 0.938213036462698 0.9919705254737688
yahoo_A2 0.9207920283403617 0.9528843792832521 0.9889622367605708
yahoo_A2 0.004301074837784714 0.0010775860989057032 -0.0
yahoo_A2 0.9770991851756917 0.9784715628147403 0.999185243013342
yahoo_A2 0.9494948986634044 0.9682104334233766 0.9939713053577918
yahoo_A2 0.007874014957529917 0.0019762843865706583 0.4557234964458525
yahoo_A2 0.24999997553415199 0.10274980993768926 0.586234176247455
yahoo_A2 0.5248226555807081 0.41950357357269813 0.5492692218565569
yahoo_A2 0.004301074837784714 0.0010775860989057032 -0.0
yahoo_A2 0.17493470983952586 0.02916533869487878 0.22688365769891136
yahoo_A2 0.2482100020665201 0.04633613748718092 0.16240085607744553
yahoo_A2 0.39772722392820836 0.3973634225908315 0.7331875431646974
yahoo_A2 0.9999998500000123 0.0 0.9999998997840271
yahoo_A2 0.1749347098395

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

yahoo_A3 0.25396823169564314 0.005835856598275209 0.0374335097557432
yahoo_A3 0.442477841491114 0.09383581613759649 0.13378283859871468
yahoo_A3 0.47750861400366645 0.2994138728614576 0.5607886928920104
yahoo_A3 0.25396823169564314 0.0013262608993260833 -0.0
yahoo_A3 0.6314151976538852 0.5503834374682364 0.65493279743683
yahoo_A3 0.41726615387920135 0.034166019737499324 0.13619594458449558
yahoo_A3 0.5852089616298454 0.08888397192347564 0.017754100676055425
yahoo_A3 0.45614031550631234 0.0423598797476971 0.12606623967291086
yahoo_A3 0.5210083646578659 0.32024124617532 0.5128406876240504
yahoo_A3 0.7134502462945892 0.35159886775217525 0.34424055989222835
yahoo_A3 0.637770854195864 0.173925955440417 0.18114626909589837
yahoo_A3 0.5783521396394755 0.29806063274826516 0.3663885183789023
yahoo_A3 0.19301846297450495 0.03087769625406694 0.04047360328393529
yahoo_A3 0.6766916843462069 0.09799971059455834 0.027876774973314186
yahoo_A3 0.11563168070466742 0.007154649962090221 0.2263078681869413

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

yahoo_A4 0.4853700147102329 0.2154048927847556 0.4544414965022028
yahoo_A4 0.25396823169564314 0.011902877405170258 0.12718266034552514
yahoo_A4 0.41726615387920135 0.020678059970751752 0.4604010310586585
yahoo_A4 0.2945736181779962 0.025890528855886972 0.10102437346848409
yahoo_A4 0.6163521584233248 0.5635690646824669 0.5928812777124592
yahoo_A4 0.5478547455195053 0.6229654232149242 0.713943743670765
yahoo_A4 0.43694490352936993 0.4181454402269689 0.5825928809741533
yahoo_A4 0.3673469086517006 0.07373759625699994 0.28189094906875767
yahoo_A4 0.5942491593197877 0.12258306279970096 0.17033821026104493
yahoo_A4 0.45070419027970904 0.1865850633674364 0.3163265024191134
yahoo_A4 0.637770854195864 0.31255927193371136 0.45219872862699956
yahoo_A4 0.3882783568463308 0.15165703229852992 0.5425728536655319
yahoo_A4 0.45070419027970904 0.0016556304449329106 0.038461534763314
yahoo_A4 0.3551401575753364 0.0015290531176560284 0.05507246022558315
yahoo_A4 0.07439824225732532 0.00117924587270016 -0.

In [6]:
yahoo_results = pd.DataFrame(total_scores)
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.710783,0.581876,0.668311
yahoo_A2,0.586948,0.396494,0.66805
yahoo_A3,0.510575,0.173903,0.257616
yahoo_A4,0.469869,0.151107,0.268865


### NASA

In [7]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [8]:
for loader in [load_nasa]:
    datasets = loader(100, 100)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)
        
        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.25300017462469454 0.1372393423219817 0.5081281985260547
D2 0.2699530282282525 0.1553041982995576 0.5280071554695182


In [9]:
nasa_results = pd.DataFrame(total_scores)
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.253,0.137239,0.508128
D2,0.269953,0.155304,0.528007


### SMD

In [4]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [5]:
for loader in [load_smd]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.20232181144029696 0.575072896662467 0.521888680406877
smd 0.643300748602941 0.5466173976039415 0.919130748548558
smd 0.15660441480072804 0.13596108774900337 0.5479628105973524
smd 0.21419350200497705 0.21075818974883948 0.5818850916941399
smd 0.689655126355723 0.7578630805913983 0.9600486201940233


KeyboardInterrupt: 

In [None]:
smd_results = pd.DataFrame(total_scores)
smd_results.groupby('dataset').mean()

### ECG

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_ecg]:
    datasets = loader(32, 16)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

In [None]:
ecg_results = pd.DataFrame(total_scores)
ecg_results.groupby('dataset').mean()

### Power Demand

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_power_demand]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc']) 

In [None]:
power_results = pd.DataFrame(total_scores)
power_results.groupby('dataset').mean()

### 2D Gesture

In [None]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [None]:
for loader in [load_gesture]:
    datasets = loader(64, 1)
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        tf.keras.backend.clear_session()

        X_train = x_trains[i]
        X_test = x_tests[i]
        
        gan = get_gan(X_train)
        dataset = tf.data.Dataset.from_tensor_slices(X_train)
        dataset = dataset.batch(128, drop_remainder=True).prefetch(1)

        train_gan(gan, dataset, 128, X_train.shape[1], X_train.shape[2])
        
        X_test_rec = gan.layers[0].predict(X_test)
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=True)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])  

In [None]:
gesture_results = pd.DataFrame(total_scores)
gesture_results.groupby('dataset').mean()