In [1]:
from scipy.io import arff
import pandas as pd
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.lof import LOF
from pyod.models.knn import KNN
from pyod.models.anogan import AnoGAN
from sklearn import metrics
import tensorflow as tf
import numpy as np
import random

2023-05-23 09:15:41.228191: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
class CustomData():
    def __init__(self, path):
        arff_data = arff.loadarff(path)
        df = pd.DataFrame(arff_data[0])
        df["outlier"] = pd.factorize(df["outlier"], sort=True)[0]
        
        self.data = df.iloc[:,:-2]
        self.ground_truth = df.iloc[:,-1]
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, i):
        return self.data[i]
        
def AUC(truth, decision):
    print("AUC: " + str(metrics.roc_auc_score(truth, decision)))

In [3]:
seed = 666
tf.keras.utils.set_random_seed(seed)
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)
np.random.default_rng(seed)

Generator(PCG64) at 0x7F2DB06BBBA0

In [10]:
(prior, prior_labels), (test, test_labels) = tf.keras.datasets.fashion_mnist.load_data()

In [51]:
(prior, prior_labels), (test, test_labels) = tf.keras.datasets.fashion_mnist.load_data()
outlier = 9
idx = prior_labels == outlier
train = prior[idx].copy() / 255
nsamples, nx, ny = np.shape(train)
train = train.reshape(nsamples, nx*ny)
    
test_copy = test.copy() / 255
nsamples, nx, ny = np.shape(test_copy)
test_copy = test_copy.reshape(nsamples, nx*ny)
    
    
ground_truth = test_labels.copy()
ground_truth[ground_truth != outlier] = 0
ground_truth[ground_truth == outlier] = 1


In [52]:
print(np.shape(train))
print(np.shape(test_copy))
print(np.shape(ground_truth))

(6000, 784)
(10000, 784)
(10000,)


In [55]:
print(train[0])

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.00392157 0.         0.         0.05098039 0.28627451 

In [None]:
arrythmia_path = "./Resources/Datasets/Arrhythmia_withoutdupl_norm_02_v01.arff"
wave_path = "./Resources/Datasets/Waveform_withoutdupl_norm_v01.arff"
internet_ads_path = "./Resources/Datasets/InternetAds_withoutdupl_norm_02_v01.arff"

dataset = CustomData(arrythmia_path)

In [None]:
mogaal_model = MO_GAAL(lr_d=0.01, lr_g=0.01, stop_epochs=50)
mogaal_model.fit(dataset.data)

In [None]:
decision_values = mogaal_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)

In [None]:
lof_model = LOF()
lof_model.fit(dataset.data)

In [None]:
decision_values = lof_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)

In [None]:
knn_model = KNN()
knn_model.fit(dataset.data)

In [None]:
decision_values = knn_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)

In [None]:
anogan_model = AnoGAN()
anogan_model.fit(dataset.data)

In [None]:
decision_values = anogan_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)
anogan_model.plot_learning_curves()