In [17]:
from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from pyod.models.loda import LODA
from pyod.models.dif import DIF
from sklearn.preprocessing import StandardScaler


In [18]:
data = loadmat('shuttle.mat')
X = data['X']
y = data['y']


scaler = StandardScaler()

X_normalized = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.4, random_state=42)

In [21]:
def evaluate_models(X, y, n_splits=1, test_size=0.4, random_state=42):
    results = {
        'IForest': {'BA': [], 'ROC_AUC': []},
        'LODA': {'BA': [], 'ROC_AUC': []},
        'DIF': {'BA': [], 'ROC_AUC': []}
    }
    
    for i in range(n_splits):
        X_train, X_test, y_train, y_test = train_test_split(
            X_normalized, y, test_size=test_size, 
            random_state=random_state + i
        )
        
        iforest = IsolationForest(random_state=random_state + i)
        loda = LODA()
        dif = DIF()
        
        for name, model in [('IForest', iforest), ('LODA', loda), ('DIF', dif)]:
            model.fit(X_train)
            
            if name == 'IForest':
                y_pred = model.predict(X_test)
                y_pred = [-1 if x == 1 else 1 for x in y_pred]  
                y_scores = -model.score_samples(X_test)  
            else:
                y_pred = model.predict(X_test)
                y_scores = model.decision_function(X_test)
            
            ba = balanced_accuracy_score(y_test, y_pred)
            auc = roc_auc_score(y_test, y_scores)
            
            results[name]['BA'].append(ba)
            results[name]['ROC_AUC'].append(auc)

    print("\nMean Results over", n_splits, "splits:")
    print("-" * 50)
    for model in results:
        mean_ba = np.mean(results[model]['BA'])
        mean_auc = np.mean(results[model]['ROC_AUC'])
        std_ba = np.std(results[model]['BA'])
        std_auc = np.std(results[model]['ROC_AUC'])
        print(f"{model}:")
        print(f"Balanced Accuracy: {mean_ba:.3f} (±{std_ba:.3f})")
        print(f"ROC AUC: {mean_auc:.3f} (±{std_auc:.3f})")
        print("-" * 50)


evaluate_models(X_normalized, y)





Mean Results over 1 splits:
--------------------------------------------------
IForest:
Balanced Accuracy: 0.493 (±0.000)
ROC AUC: 0.998 (±0.000)
--------------------------------------------------
LODA:
Balanced Accuracy: 0.737 (±0.000)
ROC AUC: 0.804 (±0.000)
--------------------------------------------------
DIF:
Balanced Accuracy: 0.517 (±0.000)
ROC AUC: 0.975 (±0.000)
--------------------------------------------------
