In [1]:
from scipy.io import arff
import pandas as pd
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.lof import LOF
from pyod.models.knn import KNN
from pyod.models.anogan import AnoGAN
from sklearn import metrics
import tensorflow as tf
import numpy as np
import random

In [2]:
class CustomData():
    def __init__(self, path):
        arff_data = arff.loadarff(path)
        df = pd.DataFrame(arff_data[0])
        df["outlier"] = pd.factorize(df["outlier"], sort=True)[0]
        
        self.data = df.iloc[:,:-2]
        self.ground_truth = df.iloc[:,-1]
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, i):
        return self.data[i]
        
def AUC(truth, decision):
    print("AUC: " + str(metrics.roc_auc_score(truth, decision)))

In [3]:
seed = 888
tf.keras.utils.set_random_seed(seed)
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)
np.random.default_rng(seed)

Generator(PCG64) at 0x2AFE30804A0

In [4]:
arrythmia_path = "./Resources/Datasets/Arrhythmia_withoutdupl_norm_02_v01.arff"
wave_path = "./Resources/Datasets/Waveform_withoutdupl_norm_v01.arff"
internet_ads_path = "./Resources/Datasets/InternetAds_withoutdupl_norm_02_v01.arff"

dataset = CustomData(wave_path)

In [5]:
mogaal_model = MO_GAAL(lr_d=0.01, lr_g=0.01, stop_epochs=50)
mogaal_model.fit(dataset.data)

  super().__init__(name, **kwargs)


Epoch 1 of 150

Testing for epoch 1 index 1:

Testing for epoch 1 index 2:

Testing for epoch 1 index 3:

Testing for epoch 1 index 4:

Testing for epoch 1 index 5:

Testing for epoch 1 index 6:
Epoch 2 of 150

Testing for epoch 2 index 1:

Testing for epoch 2 index 2:

Testing for epoch 2 index 3:

Testing for epoch 2 index 4:

Testing for epoch 2 index 5:

Testing for epoch 2 index 6:
Epoch 3 of 150

Testing for epoch 3 index 1:

Testing for epoch 3 index 2:

Testing for epoch 3 index 3:

Testing for epoch 3 index 4:

Testing for epoch 3 index 5:

Testing for epoch 3 index 6:
Epoch 4 of 150

Testing for epoch 4 index 1:

Testing for epoch 4 index 2:

Testing for epoch 4 index 3:

Testing for epoch 4 index 4:

Testing for epoch 4 index 5:

Testing for epoch 4 index 6:
Epoch 5 of 150

Testing for epoch 5 index 1:

Testing for epoch 5 index 2:

Testing for epoch 5 index 3:

Testing for epoch 5 index 4:

Testing for epoch 5 index 5:

Testing for epoch 5 index 6:
Epoch 6 of 150

Testing f

KeyboardInterrupt: 

In [None]:
decision_values = mogaal_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)

AUC: 0.6977459016393442


In [None]:
lof_model = LOF()
lof_model.fit(dataset.data)

LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, novelty=True, p=2)

In [None]:
decision_values = lof_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)

AUC: 0.7295081967213115


In [None]:
knn_model = KNN()
knn_model.fit(dataset.data)

KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0)

In [None]:
decision_values = knn_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)

AUC: 0.7264344262295082


In [None]:
anogan_model = AnoGAN()
anogan_model.fit(dataset.data)

AnoGAN(D_layers=[20, 10, 5], G_layers=[20, 10, 3, 10, 20],
    activation_hidden='tanh', batch_size=32, contamination=0.1,
    dropout_rate=0.2, epochs=500, epochs_query=20,
    index_D_layer_for_recon_error=1, latent_dim_G=2, learning_rate=0.001,
    learning_rate_query=0.01, output_activation=None, preprocessing=False,
    verbose=0)

In [None]:
decision_values = anogan_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)

AUC: 0.7008196721311475
