In [9]:
from scipy.io import arff
import pandas as pd
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.lof import LOF
from pyod.models.knn import KNN
from pyod.models.anogan import AnoGAN
from sklearn import metrics
import tensorflow as tf
import numpy as np
import random

In [10]:
class CustomData():
    def __init__(self, path):
        arff_data = arff.loadarff(path)
        df = pd.DataFrame(arff_data[0])
        df["outlier"] = pd.factorize(df["outlier"], sort=True)[0]
        
        self.data = df.iloc[:,:-2]
        self.ground_truth = df.iloc[:,-1]
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, i):
        return self.data[i]
        
def AUC(truth, decision):
    print("AUC: " + str(metrics.roc_auc_score(truth, decision)))

In [11]:
import os
def initialize(seed):
    tf.keras.utils.set_random_seed(seed) #seeds numpy, random and tf all at once
    tf.config.experimental.enable_op_determinism()
    
    tf.config.threading.set_inter_op_parallelism_threads(1)
    tf.config.threading.set_intra_op_parallelism_threads(1)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ["PYTHONHASSEED"] = str(seed)

In [None]:
'''
(prior, prior_labels), (test, test_labels) = tf.keras.datasets.fashion_mnist.load_data()
outlier = 3
    
idx = prior_labels == outlier
train = prior[idx].copy() / 255
nsamples, nx, ny = np.shape(train)
train = train.reshape(nsamples, nx*ny)
    
test_copy = test.copy() / 255
nsamples, nx, ny = np.shape(test_copy)
test_copy = test_copy.reshape(nsamples, nx*ny)
    
    # DONT USE 1 OR 0 AS INLIER
ground_truth = test_labels.copy()
ground_truth[ground_truth != outlier] = 1
ground_truth[ground_truth == outlier] = 0
    
initialize(777)
'''

In [None]:
'''
out_idx = test_labels == outlier
norm_idx = test_labels != outlier

outliers = test[out_idx].copy()
normal = test[norm_idx].copy()

outliers = outliers[0:20]

res = np.concatenate((outliers, normal)) / 255
nsamples, nx, ny = np.shape(res)
res = res.reshape(nsamples, nx*ny)

ground = np.ones((9020))
for i in range(20):
    ground[i] = 0
'''

In [60]:
'''
(prior, prior_labels), (test, test_labels) = tf.keras.datasets.fashion_mnist.load_data()
inlier = 3

idx = prior_labels == inlier
train = prior[idx].copy() / 255
nsamples, nx, ny = np.shape(train)
train = train.reshape(nsamples, nx*ny)
    
test_copy = test.copy() / 255
nsamples, nx, ny = np.shape(test_copy)
test_copy = test_copy.reshape(nsamples, nx*ny)
    
    # DONT USE 1 OR 0 AS INLIER
ground_truth = test_labels.copy()
ground_truth[ground_truth != inlier] = 1
ground_truth[ground_truth == inlier] = 0
''' 

'\n(prior, prior_labels), (test, test_labels) = tf.keras.datasets.fashion_mnist.load_data()\ninlier = 3\n\nidx = prior_labels == inlier\ntrain = prior[idx].copy() / 255\nnsamples, nx, ny = np.shape(train)\ntrain = train.reshape(nsamples, nx*ny)\n    \ntest_copy = test.copy() / 255\nnsamples, nx, ny = np.shape(test_copy)\ntest_copy = test_copy.reshape(nsamples, nx*ny)\n    \n    # DONT USE 1 OR 0 AS INLIER\nground_truth = test_labels.copy()\nground_truth[ground_truth != inlier] = 1\nground_truth[ground_truth == inlier] = 0\n'

In [69]:
(prior, prior_labels), (test, test_labels) = tf.keras.datasets.cifar10.load_data() #tf.keras.datasets.fashion_mnist.load_data()
inlier = 6
idx = np.where(prior_labels == inlier)

train = prior[idx[0]].copy()

print(np.shape(train))
print(len(train))
nsamples, nx, ny, nz = np.shape(train)
train = train.reshape(nsamples, nx*ny*nz) / 255
    

test_copy = test.copy() / 255
nsamples, nx, ny, nz = np.shape(test_copy)
test_copy = test_copy.reshape(nsamples, nx*ny*nz)
    
    # DONT USE 1 OR 0 AS INLIER
ground_truth = test_labels.copy()
ground_truth[ground_truth != inlier] = 1
ground_truth[ground_truth == inlier] = 0

initialize(777)
lof_model = LOF()
lof_model.fit(train)
AUC(ground_truth, lof_model.decision_function(test_copy))

(5000, 32, 32, 3)
5000
AUC: 0.691850111111111


AUC: 0.5108335555555555


In [None]:
arrythmia_path = "./Resources/Datasets/Arrhythmia_withoutdupl_norm_02_v01.arff"
wave_path = "./Resources/Datasets/Waveform_withoutdupl_norm_v01.arff"
internet_ads_path = "./Resources/Datasets/InternetAds_withoutdupl_norm_02_v01.arff"

dataset = CustomData(arrythmia_path)

In [None]:
mogaal_model = MO_GAAL(lr_d=0.01, lr_g=0.01, stop_epochs=50)
mogaal_model.fit(dataset.data)

In [None]:
decision_values = mogaal_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)

In [None]:
lof_model = LOF()
lof_model.fit(dataset.data)

In [None]:
decision_values = lof_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)

In [None]:
knn_model = KNN()
knn_model.fit(dataset.data)

In [None]:
decision_values = knn_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)

In [None]:
anogan_model = AnoGAN()
anogan_model.fit(dataset.data)

In [None]:
decision_values = anogan_model.decision_function(dataset.data)
AUC(dataset.ground_truth, decision_values)
anogan_model.plot_learning_curves()