In [None]:
import warnings #suppress warnings
import torch
import random
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from scipy.io import arff
from pyod.models.anogan import AnoGAN
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.lof import LOF
from pyod.models.gmm import GMM

warnings.simplefilter("ignore")

In [2]:
class CustomDataset(Dataset):
    def __init__(self, path):
        # start preprocessing 
        self.arff_data = arff.loadarff(path)
        self.df = pd.DataFrame(self.arff_data[0])
        #0 is outlier, 1 is normal data
        self.df["outlier"] = pd.factorize(self.df["outlier"])[0]
        #end preprocessing
        
        self.data_tensor = torch.tensor(self.df.to_numpy()).float()
        self.data_numpy = self.df.to_numpy()
        self.n = self.df.shape[0]
        
    def __len__(self):
        return len(self.data_tensor)
    
    def __getitem__(self, i):
        return self.data_tensor[i]

In [19]:
path = "./Resources/Datasets/Arrhythmia_withoutdupl_norm_02_v01.arff"
seed = 777
torch.manual_seed(seed)
random.seed(seed)
num_workers = 2
batch_size = 128
#number of used GPUs
gpu = 0 

usedDevice = torch.device("cpu" if gpu == 0 else "cuda")
dataset = CustomDataset(path)
train_set, eval_set, test_set = torch.utils.data.random_split(dataset.data_numpy[:,:-1], [0.6,0.2,0.2]) #PFUSCH WEGEN NUMPY?
#maybe data loader for each category?
dataloader = DataLoader(dataset=dataset.data_tensor, batch_size = batch_size, shuffle=True, num_workers=num_workers)

In [None]:
for i in range(10):
    print(test_set[i])

In [21]:
def check_accuracy(prediction):
    correct = 0
    wrong = 0
    idx = 0
    for i in test_set.indices:
        if dataset.data_numpy[i][-1] != prediction[idx]:
            correct += 1
        else: 
            wrong += 1
        idx += 1

    print(len(test_set))
    print("Accuracy: " + str(correct/len(test_set)))

In [None]:
mogaal_model = MO_GAAL(contamination=0.05)
mogaal_model.fit(train_set)

In [None]:
test_mogaal_pred = mogaal_model.predict(test_set)

In [None]:
check_accuracy(test_mogaal_pred)

In [None]:
anogan_model = AnoGAN()
anogan_model.fit(train_set)

In [10]:
test_anogan_pred = anogan_model.predict(test_set)

In [None]:
check_accuracy(test_anogan_pred)

In [None]:
lof_model = LOF()
lof_model.fit(train_set)

In [13]:
test_lof_pred = lof_model.predict(test_set)

In [None]:
check_accuracy(test_lof_pred)

In [None]:
gmm_model = GMM(n_components=1)
gmm_model.fit(train_set)

In [31]:
test_gmm_pred = gmm_model.predict(test_set)

In [None]:
check_accuracy(test_gmm_pred)