KNN

In [91]:
import numpy as np
import pandas as pd
from pyod.models.knn import KNN
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

def knn_outlier_detection(data, ground_truth, n_neighbors=20):
    """
    Performs KNN outlier detection on the dataset and evaluates it using precision, recall, and AUC.

    Parameters:
        data (numpy array): Input dataset of shape (n_samples, n_features).
        ground_truth (numpy array or pd.Series): Ground truth labels (1 = outlier, 0 = normal).
        n_neighbors (int): Number of neighbors for KNN.
        contamination (float): Proportion of the dataset expected to be outliers.

    Returns:
        dict: Precision, recall, and AUC scores.
    """
    # Standardize the dataset
    scaler = StandardScaler()
    data_std = scaler.fit_transform(data)

    # Initialize the KNN model
    knn = KNN(n_neighbors=n_neighbors)
    
    # Fit the model
    knn.fit(data_std)

    # Get outlier scores and predictions
    outlier_scores = knn.decision_scores_  # Outlier scores (higher is more anomalous)
    predictions = knn.predict(data_std)   # Predictions (1 = outlier, 0 = normal)

    # Calculate evaluation metrics
    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return precision,recall, auc



In [92]:
# KNN
# dataset: Annthyroid_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Annthyroid\\Annthyroid_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.05389221556886228
Recall: 0.2647058823529412
AUC: 0.6709285777326686


In [93]:
# KNN
# dataset: Arrhythmia_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Arrhythmia\\Arrhythmia_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.04
Recall: 0.3333333333333333
AUC: 0.8565573770491803


In [94]:
# KNN
# dataset: Cardiotocography_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Cardiotocography\\Cardiotocography_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.05421686746987952
Recall: 0.28125
AUC: 0.7612537764350453


In [95]:
# KNN
# dataset: HeartDisease_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\HeartDisease\\HeartDisease_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.7966666666666667


In [96]:
# KNN
# dataset: Hepatitis_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Hepatitis\\Hepatitis_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.3333333333333333
Recall: 0.6666666666666666
AUC: 0.8484848484848484


In [97]:
# KNN
# dataset: Lymphography_withoutdupl_norm_1ofn.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Lymphography\\Lymphography_withoutdupl_norm_1ofn.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-2].values
    
    # Separate labels (last column)
labels = df.iloc[:, -2].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.4
Recall: 1.0
AUC: 0.9976359338061466


In [98]:
# KNN
# dataset: Parkinson_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Parkinson\\Parkinson_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.2
Recall: 1.0
AUC: 0.9583333333333334


In [99]:
# KNN
# dataset: Pima_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Pima\\Pima_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.6482222222222221


LOF

In [100]:
import numpy as np
import pandas as pd
from pyod.models.lof import LOF
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

def lof_outlier_detection(data, ground_truth, n_neighbors=20):
    """
    Performs LOF outlier detection on the dataset and evaluates it using precision, recall, and AUC.

    Parameters:
        data (numpy array): Input dataset of shape (n_samples, n_features).
        ground_truth (numpy array or pd.Series): Ground truth labels (1 = outlier, 0 = normal).
        n_neighbors (int): Number of neighbors for LOF.
        contamination (float): Proportion of the dataset expected to be outliers.

    Returns:
        dict: Precision, recall, and AUC scores.
    """
    # Standardize the dataset
    scaler = StandardScaler()
    data_std = scaler.fit_transform(data)

    # Initialize the LOF model
    lof = LOF(n_neighbors=n_neighbors)
    
    # Fit the model
    lof.fit(data_std)

    # Get outlier scores and predictions
    outlier_scores = lof.decision_scores_  # Outlier scores (higher is more anomalous)
    predictions = lof.predict(data_std)   # Predictions (1 = outlier, 0 = normal)

    # Calculate evaluation metrics
    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return precision,recall, auc
    
    

In [101]:
# LOF
# dataset: Annthyroid_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Annthyroid\\Annthyroid_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.06856187290969899
Recall: 0.3014705882352941
AUC: 0.7558470499977936


In [102]:
# LOF
# dataset: Arrhythmia_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Arrhythmia\\Arrhythmia_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.05
Recall: 0.3333333333333333
AUC: 0.8510928961748634


In [103]:
# LOF
# dataset: Cardiotocography_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Cardiotocography\\Cardiotocography_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.05442176870748299
Recall: 0.25
AUC: 0.7757930513595166


In [104]:
# LOF
# dataset: HeartDisease_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\HeartDisease\\HeartDisease_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.6366666666666667


In [105]:
# LOF
# dataset: Hepatitis_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Hepatitis\\Hepatitis_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.14285714285714285
Recall: 0.3333333333333333
AUC: 0.8080808080808081


In [106]:
# LOF
# dataset: Lymphography_withoutdupl_norm_1ofn.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Lymphography\\Lymphography_withoutdupl_norm_1ofn.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-2].values
    
    # Separate labels (last column)
labels = df.iloc[:, -2].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.42857142857142855
Recall: 1.0
AUC: 0.9893617021276595


In [107]:
# LOF
# dataset: Parkinson_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Parkinson\\Parkinson_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.2
Recall: 1.0
AUC: 0.9583333333333334


In [108]:
# LOF
# dataset: Pima_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Pima\\Pima_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.6048888888888888


ABOD

In [109]:
import numpy as np
import pandas as pd
from pyod.models.abod import ABOD
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

def abod_outlier_detection(data, ground_truth):
    """
    Performs ABOD outlier detection on the dataset and evaluates it using precision, recall, and AUC.

    Parameters:
        data (numpy array): Input dataset of shape (n_samples, n_features).
        ground_truth (numpy array or pd.Series): Ground truth labels (1 = outlier, 0 = normal).
        contamination (float): Proportion of the dataset expected to be outliers.

    Returns:
        dict: Precision, recall, and AUC scores.
    """
    # Standardize the dataset
    scaler = StandardScaler()
    data_std = scaler.fit_transform(data)

    # Initialize the ABOD model
    abod = ABOD()
    
    # Fit the model
    abod.fit(data_std)

    # Get outlier scores and predictions
    outlier_scores = abod.decision_scores_  # Outlier scores (higher is more anomalous)
    predictions = abod.predict(data_std)   # Predictions (1 = outlier, 0 = normal)

    # Calculate evaluation metrics
    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return  precision, recall, auc


In [110]:
# ABOD
# dataset: Annthyroid_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Annthyroid\\Annthyroid_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0715307582260372
Recall: 0.36764705882352944
AUC: 0.7733870967741935


In [111]:
# ABOD
# dataset: Arrhythmia_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Arrhythmia\\Arrhythmia_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.034482758620689655
Recall: 0.3333333333333333
AUC: 0.7909836065573771


In [112]:
# ABOD
# dataset: Cardiotocography_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Cardiotocography\\Cardiotocography_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.053763440860215055
Recall: 0.3125
AUC: 0.7116125377643505


In [113]:
# ABOD
# dataset: HeartDisease_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\HeartDisease\\HeartDisease_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.7333333333333334


In [114]:
# ABOD
# dataset: Hepatitis_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Hepatitis\\Hepatitis_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.09090909090909091
Recall: 0.3333333333333333
AUC: 0.7070707070707071


In [115]:
# ABOD
# dataset: Lymphography_withoutdupl_norm_1ofn.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Lymphography\\Lymphography_withoutdupl_norm_1ofn.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-2].values
    
    # Separate labels (last column)
labels = df.iloc[:, -2].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.3333333333333333
Recall: 1.0
AUC: 0.9716312056737588


In [116]:
# ABOD
# dataset: Parkinson_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Parkinson\\Parkinson_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.16666666666666666
Recall: 1.0
AUC: 0.9583333333333334


In [117]:
# ABOD
# dataset: Pima_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Pima\\Pima_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.03571428571428571
Recall: 0.2222222222222222
AUC: 0.6888888888888889


KNN LOF ABOD with avg

In [118]:
import numpy as np
import pandas as pd
from pyod.models.abod import ABOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler

def knn_lof_abod_avg_outlier_detection(data, ground_truth, contamination=0.1, n_neighbors=20):
    """
    Combines ABOD, KNN, and LOF to calculate outlier scores and evaluates using precision, recall, and AUC.

    Parameters:
        data (numpy array): Input dataset of shape (n_samples, n_features).
        ground_truth (numpy array or pd.Series): Ground truth labels (1 = outlier, 0 = normal).
        contamination (float): Proportion of the dataset expected to be outliers.
        n_neighbors (int): Number of neighbors for KNN and LOF.

    Returns:
        dict: Precision, recall, and AUC scores.
    """
    # Standardize the dataset
    scaler = StandardScaler()
    data_std = scaler.fit_transform(data)

    # Initialize and fit ABOD
    abod = ABOD(contamination=contamination)
    abod.fit(data_std)
    abod_scores = abod.decision_scores_  # Outlier scores from ABOD

    # Initialize and fit KNN
    knn = KNN(n_neighbors=n_neighbors, contamination=contamination)
    knn.fit(data_std)
    knn_scores = knn.decision_scores_  # Outlier scores from KNN

    # Initialize and fit LOF
    lof = LOF(n_neighbors=n_neighbors, contamination=contamination)
    lof.fit(data_std)
    lof_scores = lof.decision_scores_  # Outlier scores from LOF

    # Normalize the scores for ABOD, KNN, and LOF
    scaler = MinMaxScaler()
    abod_scores_norm = scaler.fit_transform(abod_scores.reshape(-1, 1)).flatten()
    knn_scores_norm = scaler.fit_transform(knn_scores.reshape(-1, 1)).flatten()
    lof_scores_norm = scaler.fit_transform(lof_scores.reshape(-1, 1)).flatten()

    # Combine the scores by averaging
    combined_scores = (abod_scores_norm + knn_scores_norm + lof_scores_norm) / 3

    # Predictions based on combined scores
    threshold = np.percentile(combined_scores, 100 * (1 - contamination))  # Threshold for outliers
    predictions = (combined_scores > threshold).astype(int)

    # Calculate evaluation metrics
    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, combined_scores)

    return precision,recall, auc


In [119]:
# KNN LOF ABOD with avg
# dataset: Annthyroid_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Annthyroid\\Annthyroid_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_avg_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0661764705882353
Recall: 0.33088235294117646
AUC: 0.7179658885309563


In [120]:
# KNN LOF ABOD with avg
# dataset: Arrhythmia_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Arrhythmia\\Arrhythmia_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_avg_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.04
Recall: 0.3333333333333333
AUC: 0.8620218579234972


In [121]:
# KNN LOF ABOD with avg
# dataset: Cardiotocography_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Cardiotocography\\Cardiotocography_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_avg_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.07692307692307693
Recall: 0.40625
AUC: 0.7845543806646524


In [122]:
# KNN LOF ABOD with avg
# dataset: HeartDisease_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\HeartDisease\\HeartDisease_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_avg_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.75


In [123]:
# KNN LOF ABOD with avg
# dataset: Hepatitis_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Hepatitis\\Hepatitis_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_avg_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.2857142857142857
Recall: 0.6666666666666666
AUC: 0.8232323232323231


In [124]:
# KNN LOF ABOD with avg
# dataset: Lymphography_withoutdupl_norm_1ofn.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Lymphography\\Lymphography_withoutdupl_norm_1ofn.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-2].values
    
    # Separate labels (last column)
labels = df.iloc[:, -2].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_avg_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.4
Recall: 1.0
AUC: 0.9929078014184397


In [125]:
# KNN LOF ABOD with avg
# dataset: Parkinson_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Parkinson\\Parkinson_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_avg_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.2
Recall: 1.0
AUC: 0.9583333333333334


In [126]:
# KNN LOF ABOD with avg
# dataset: Pima_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Pima\\Pima_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_avg_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.6519999999999999


KNN LOF ABOD with max

In [127]:
import numpy as np
import pandas as pd
from pyod.models.abod import ABOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

def knn_lof_abod_max_outlier_detection(data, ground_truth, contamination=0.1, n_neighbors=20):
    """
    Combines normalized outlier scores from ABOD, KNN, and LOF by taking the maximum score
    and evaluates the result using precision, recall, and AUC.

    Parameters:
        data (numpy array): Input dataset of shape (n_samples, n_features).
        ground_truth (numpy array or pd.Series): Ground truth labels (1 = outlier, 0 = normal).
        contamination (float): Proportion of the dataset expected to be outliers.
        n_neighbors (int): Number of neighbors for KNN and LOF.

    Returns:
        dict: Precision, recall, and AUC scores.
    """
    # Standardize the dataset
    scaler = StandardScaler()
    data_std = scaler.fit_transform(data)

    # Initialize and fit ABOD
    abod = ABOD(contamination=contamination)
    abod.fit(data_std)
    abod_scores = abod.decision_scores_

    # Initialize and fit KNN
    knn = KNN(n_neighbors=n_neighbors, contamination=contamination)
    knn.fit(data_std)
    knn_scores = knn.decision_scores_

    # Initialize and fit LOF
    lof = LOF(n_neighbors=n_neighbors, contamination=contamination)
    lof.fit(data_std)
    lof_scores = lof.decision_scores_

    # Normalize the scores for ABOD, KNN, and LOF
    scaler = MinMaxScaler()
    abod_scores_norm = scaler.fit_transform(abod_scores.reshape(-1, 1)).flatten()
    knn_scores_norm = scaler.fit_transform(knn_scores.reshape(-1, 1)).flatten()
    lof_scores_norm = scaler.fit_transform(lof_scores.reshape(-1, 1)).flatten()

    # Combine the scores by taking the maximum
    combined_scores = np.maximum.reduce([abod_scores_norm, knn_scores_norm, lof_scores_norm])

    # Predictions based on combined scores
    threshold = np.percentile(combined_scores, 100 * (1 - contamination))  # Threshold for outliers
    predictions = (combined_scores > threshold).astype(int)

    # Calculate evaluation metrics
    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, combined_scores)

    return precision, recall, auc


In [128]:
# KNN LOF ABOD with max
# dataset: Annthyroid_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Annthyroid\\Annthyroid_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_max_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.06764705882352941
Recall: 0.3382352941176471
AUC: 0.7733870967741935


In [129]:
# KNN LOF ABOD with max
# dataset: Arrhythmia_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Arrhythmia\\Arrhythmia_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_max_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)

Precision: 0.04
Recall: 0.3333333333333333
AUC: 0.7930327868852459


In [130]:
# KNN LOF ABOD with max
# dataset: Cardiotocography_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Cardiotocography\\Cardiotocography_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_max_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)

Precision: 0.05917159763313609
Recall: 0.3125
AUC: 0.711631419939577


In [131]:
# KNN LOF ABOD with max
# dataset: HeartDisease_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\HeartDisease\\HeartDisease_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_max_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)

Precision: 0.0
Recall: 0.0
AUC: 0.7333333333333334


In [132]:
# KNN LOF ABOD with max
# dataset: Hepatitis_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Hepatitis\\Hepatitis_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_max_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.7070707070707071


In [133]:
# KNN LOF ABOD with max
# dataset: Lymphography_withoutdupl_norm_1ofn.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Lymphography\\Lymphography_withoutdupl_norm_1ofn.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-2].values
    
    # Separate labels (last column)
labels = df.iloc[:, -2].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_max_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.3333333333333333
Recall: 0.8333333333333334
AUC: 0.9728132387706856


In [134]:
# KNN LOF ABOD with max
# dataset: Parkinson_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Parkinson\\Parkinson_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_max_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)

Precision: 0.2
Recall: 1.0
AUC: 0.9583333333333334


In [135]:
# KNN LOF ABOD with max
# dataset: Pima_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Pima\\Pima_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_max_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0392156862745098
Recall: 0.2222222222222222
AUC: 0.6888888888888889


KNN LOF ABOD with min

In [136]:
import numpy as np
import pandas as pd
from pyod.models.abod import ABOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

def knn_lof_abod_min_outlier_detection(data, ground_truth, contamination=0.1, n_neighbors=20):
    """
    Combines normalized outlier scores from ABOD, KNN, and LOF by taking the maximum score
    and evaluates the result using precision, recall, and AUC.

    Parameters:
        data (numpy array): Input dataset of shape (n_samples, n_features).
        ground_truth (numpy array or pd.Series): Ground truth labels (1 = outlier, 0 = normal).
        contamination (float): Proportion of the dataset expected to be outliers.
        n_neighbors (int): Number of neighbors for KNN and LOF.

    Returns:
        dict: Precision, recall, and AUC scores.
    """
    # Standardize the dataset
    scaler = StandardScaler()
    data_std = scaler.fit_transform(data)

    # Initialize and fit ABOD
    abod = ABOD(contamination=contamination)
    abod.fit(data_std)
    abod_scores = abod.decision_scores_

    # Initialize and fit KNN
    knn = KNN(n_neighbors=n_neighbors, contamination=contamination)
    knn.fit(data_std)
    knn_scores = knn.decision_scores_

    # Initialize and fit LOF
    lof = LOF(n_neighbors=n_neighbors, contamination=contamination)
    lof.fit(data_std)
    lof_scores = lof.decision_scores_

    # Normalize the scores for ABOD, KNN, and LOF
    scaler = MinMaxScaler()
    abod_scores_norm = scaler.fit_transform(abod_scores.reshape(-1, 1)).flatten()
    knn_scores_norm = scaler.fit_transform(knn_scores.reshape(-1, 1)).flatten()
    lof_scores_norm = scaler.fit_transform(lof_scores.reshape(-1, 1)).flatten()

    # Combine the scores by taking the minimum
    combined_scores = np.minimum.reduce([abod_scores_norm, knn_scores_norm, lof_scores_norm])

    # Predictions based on combined scores
    threshold = np.percentile(combined_scores, 100 * (1 - contamination))  # Threshold for outliers
    predictions = (combined_scores > threshold).astype(int)

    # Calculate evaluation metrics
    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, combined_scores)

    return precision, recall, auc


In [137]:
# KNN LOF ABOD with min
# dataset: Annthyroid_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Annthyroid\\Annthyroid_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_min_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.06323529411764706
Recall: 0.3161764705882353
AUC: 0.7571797361104983


In [138]:
# KNN LOF ABOD with min
# dataset: Arrhythmia_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Arrhythmia\\Arrhythmia_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_min_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)

Precision: 0.04
Recall: 0.3333333333333333
AUC: 0.8510928961748634


In [139]:
# KNN LOF ABOD with min
# dataset: Cardiotocography_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Cardiotocography\\Cardiotocography_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_min_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)

Precision: 0.047337278106508875
Recall: 0.25
AUC: 0.7763783987915408


In [140]:
# KNN LOF ABOD with min
# dataset: HeartDisease_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\HeartDisease\\HeartDisease_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_min_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)

Precision: 0.0
Recall: 0.0
AUC: 0.6366666666666666


In [141]:
# KNN LOF ABOD with min
# dataset: Hepatitis_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Hepatitis\\Hepatitis_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_min_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.2857142857142857
Recall: 0.6666666666666666
AUC: 0.8333333333333331


In [142]:
# KNN LOF ABOD with min
# dataset: Lymphography_withoutdupl_norm_1ofn.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Lymphography\\Lymphography_withoutdupl_norm_1ofn.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-2].values
    
    # Separate labels (last column)
labels = df.iloc[:, -2].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_min_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.4
Recall: 1.0
AUC: 0.9905437352245863


In [143]:
# KNN LOF ABOD with min
# dataset: Parkinson_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Parkinson\\Parkinson_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_min_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)

Precision: 0.2
Recall: 1.0
AUC: 0.9583333333333334


In [144]:
# KNN LOF ABOD with min
# dataset: Pima_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Pima\\Pima_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_lof_abod_min_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.6073333333333333
