In [4]:
import pandas as pd
import brminer
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Function importing Dataset 
def importdata(trainFile, testFile): 
    train = pd.read_csv(trainFile, sep= ',') 
    test = pd.read_csv(testFile, sep= ',') 
    return train, test    

# Function to split target from data 
def splitdataset(train, test): 
    ohe = OneHotEncoder(sparse=True)
    objInTrain = len(train)

    allData = pd.concat([train, test], ignore_index=True, sort =False, axis=0)
    AllDataWihoutClass = allData.iloc[:, :-1]
    AllDataWihoutClassOnlyNominals = AllDataWihoutClass.select_dtypes(include=['object'])
    AllDataWihoutClassNoNominals = AllDataWihoutClass.select_dtypes(exclude=['object'])

    encAllDataWihoutClassNominals = ohe.fit_transform(AllDataWihoutClassOnlyNominals)
    encAllDataWihoutClassNominalsToPanda = pd.DataFrame(encAllDataWihoutClassNominals.toarray())
    
    if AllDataWihoutClassOnlyNominals.shape[1] > 0:
      codAllDataAgain = pd.concat([encAllDataWihoutClassNominalsToPanda, AllDataWihoutClassNoNominals], ignore_index=True, sort =False, axis=1)
    else:
      codAllDataAgain = AllDataWihoutClass

    # Seperating the target variable 
    X_train = codAllDataAgain[:objInTrain]
    y_train = train.values[:, -1]

    X_test = codAllDataAgain[objInTrain:]
    y_test = test.values[:, -1]
    
    mm_scaler = MinMaxScaler()
    X_train_minmax = pd.DataFrame(mm_scaler.fit_transform(X_train[X_train.columns]), index=X_train.index, columns=X_train.columns)
    X_test_minmax = pd.DataFrame(mm_scaler.transform(X_test[X_test.columns]), index=X_test.index, columns=X_test.columns)
    
    std_scaler = StandardScaler()
    X_train_std = pd.DataFrame(std_scaler.fit_transform(X_train[X_train.columns]), index=X_train.index, columns=X_train.columns)
    X_test_std = pd.DataFrame(std_scaler.transform(X_test[X_test.columns]), index=X_test.index, columns=X_test.columns)
    
    X_train_minmax_std = pd.DataFrame(std_scaler.fit_transform(X_train_minmax[X_train_minmax.columns]), index=X_train_minmax.index, columns=X_train_minmax.columns)
    X_test_minmax_std = pd.DataFrame(std_scaler.transform(X_test_minmax[X_test_minmax.columns]), index=X_test_minmax.index, columns=X_test_minmax.columns)
    
    return X_train, X_test, y_train, y_test, X_train_minmax, X_test_minmax, X_train_std, X_test_std, X_train_minmax_std, X_test_minmax_std

# Function to make predictions 
def prediction(X_test, clf_object):  
    y_pred = clf_object.score_samples(X_test) 
    return y_pred 

def result_of_Class(y_test, y_pred, saveFile):       
    np.savetxt(saveFile, y_pred, fmt='%.4f')

In [10]:
import os
import time

keel_datasets_path = "Unsupervised_Anomaly_Detection"
keel_datasets_path = os.path.abspath(keel_datasets_path)

for root, dirs, files in os.walk(keel_datasets_path, topdown=False):
    for name in dirs:
        trainFile = keel_datasets_path + "\\" + name + "\\" + name + "-5-1tra.csv"
        testFile = keel_datasets_path + "\\" + name + "\\" + name + "-5-1tst.csv"

        print(name)
        
        tic = time.perf_counter()

        # Loading data 
        train, test = importdata(trainFile, testFile)

        # Training
        X_train, X_test, y_train, y_test, X_train_minmax, X_test_minmax, X_train_std, X_test_std,    X_train_minmax_std, X_test_minmax_std = splitdataset(train, test) 

        clf_classif1 = brminer.BRM()

        clf_classif1.fit(X_train, y_train) 

        y_pred = clf_classif1.score_samples(X_test)
        auc = roc_auc_score(y_test,  y_pred)

        toc = time.perf_counter()

        print(f'Testing AUC: {auc if auc > .5 else 1 - auc}')
        print(f'This took {toc - tic:0.4f} seconds')

abalone-17_vs_7-8-9-10
Testing AUC: 0.8315058479532164
This took 6.2208 seconds
abalone-19_vs_10-11-12-13
Testing AUC: 0.5956873315363882
This took 2.9196 seconds
abalone-20_vs_8-9-10
Testing AUC: 0.8483245149911817
This took 3.9546 seconds
abalone-21_vs_8
Testing AUC: 0.9883040935672515
This took 0.5045 seconds
abalone-3_vs_11
Testing AUC: 0.6904761904761905
This took 0.4128 seconds
abalone19
Testing AUC: 0.6393244873341375
This took 18.4293 seconds
abalone9-18
Testing AUC: 0.8703703703703703
This took 0.7210 seconds
car-good
Testing AUC: 0.5
This took 4.0997 seconds
car-vgood
Testing AUC: 0.5368445368445368
This took 4.0960 seconds
cleveland-0_vs_4
Testing AUC: 0.9583333333333334
This took 0.1453 seconds
dermatology-6
Testing AUC: 0.6838235294117647
This took 0.5223 seconds
ecoli-0-1-3-7_vs_2-6
Testing AUC: 0.9272727272727272
This took 0.1565 seconds
ecoli-0-1-4-6_vs_5
Testing AUC: 0.7932692307692308
This took 0.1543 seconds
ecoli-0-1-4-7_vs_2-3-5-6
Testing AUC: 0.717741935483871
Thi