KNN

In [6]:
import numpy as np
import pandas as pd
from pyod.models.knn import KNN
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

def knn_outlier_detection(data, ground_truth, n_neighbors=20):
    """
    Performs KNN outlier detection on the dataset and evaluates it using precision, recall, and AUC.

    Parameters:
        data (numpy array): Input dataset of shape (n_samples, n_features).
        ground_truth (numpy array or pd.Series): Ground truth labels (1 = outlier, 0 = normal).
        n_neighbors (int): Number of neighbors for KNN.
        contamination (float): Proportion of the dataset expected to be outliers.

    Returns:
        dict: Precision, recall, and AUC scores.
    """
    # Standardize the dataset
    scaler = StandardScaler()
    data_std = scaler.fit_transform(data)

    # Initialize the KNN model
    knn = KNN(n_neighbors=n_neighbors)
    
    # Fit the model
    knn.fit(data_std)

    # Get outlier scores and predictions
    outlier_scores = knn.decision_scores_  # Outlier scores (higher is more anomalous)
    predictions = knn.predict(data_std)   # Predictions (1 = outlier, 0 = normal)

    # Calculate evaluation metrics
    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return precision,recall, auc



In [9]:
# KNN
# dataset: Annthyroid_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Annthyroid\\Annthyroid_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.05389221556886228
Recall: 0.2647058823529412
AUC: 0.6709285777326686


In [8]:
# KNN
# dataset: Arrhythmia_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Arrhythmia\\Arrhythmia_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.04
Recall: 0.3333333333333333
AUC: 0.8565573770491803


In [10]:
# KNN
# dataset: Cardiotocography_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Cardiotocography\\Cardiotocography_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.05421686746987952
Recall: 0.28125
AUC: 0.7612537764350453


In [11]:
# KNN
# dataset: HeartDisease_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\HeartDisease\\HeartDisease_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.7966666666666667


In [12]:
# KNN
# dataset: Hepatitis_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Hepatitis\\Hepatitis_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.3333333333333333
Recall: 0.6666666666666666
AUC: 0.8484848484848484


In [14]:
# KNN
# dataset: Lymphography_withoutdupl_norm_1ofn.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Lymphography\\Lymphography_withoutdupl_norm_1ofn.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-2].values
    
    # Separate labels (last column)
labels = df.iloc[:, -2].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.4
Recall: 1.0
AUC: 0.9976359338061466


In [15]:
# KNN
# dataset: Parkinson_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Parkinson\\Parkinson_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.2
Recall: 1.0
AUC: 0.9583333333333334


In [16]:
# KNN
# dataset: Pima_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Pima\\Pima_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = knn_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.6482222222222221


LOF

In [32]:
import numpy as np
import pandas as pd
from pyod.models.lof import LOF
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

def lof_outlier_detection(data, ground_truth, n_neighbors=20):
    """
    Performs LOF outlier detection on the dataset and evaluates it using precision, recall, and AUC.

    Parameters:
        data (numpy array): Input dataset of shape (n_samples, n_features).
        ground_truth (numpy array or pd.Series): Ground truth labels (1 = outlier, 0 = normal).
        n_neighbors (int): Number of neighbors for LOF.
        contamination (float): Proportion of the dataset expected to be outliers.

    Returns:
        dict: Precision, recall, and AUC scores.
    """
    # Standardize the dataset
    scaler = StandardScaler()
    data_std = scaler.fit_transform(data)

    # Initialize the LOF model
    lof = LOF(n_neighbors=n_neighbors)
    
    # Fit the model
    lof.fit(data_std)

    # Get outlier scores and predictions
    outlier_scores = lof.decision_scores_  # Outlier scores (higher is more anomalous)
    predictions = lof.predict(data_std)   # Predictions (1 = outlier, 0 = normal)

    # Calculate evaluation metrics
    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return precision,recall, auc
    
    

In [33]:
# LOF
# dataset: Annthyroid_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Annthyroid\\Annthyroid_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.06856187290969899
Recall: 0.3014705882352941
AUC: 0.7558470499977936


In [34]:
# LOF
# dataset: Arrhythmia_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Arrhythmia\\Arrhythmia_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.05
Recall: 0.3333333333333333
AUC: 0.8510928961748634


In [35]:
# LOF
# dataset: Cardiotocography_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Cardiotocography\\Cardiotocography_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.05442176870748299
Recall: 0.25
AUC: 0.7757930513595166


In [36]:
# LOF
# dataset: HeartDisease_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\HeartDisease\\HeartDisease_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.6366666666666667


In [38]:
# LOF
# dataset: Hepatitis_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Hepatitis\\Hepatitis_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.14285714285714285
Recall: 0.3333333333333333
AUC: 0.8080808080808081


In [39]:
# LOF
# dataset: Lymphography_withoutdupl_norm_1ofn.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Lymphography\\Lymphography_withoutdupl_norm_1ofn.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-2].values
    
    # Separate labels (last column)
labels = df.iloc[:, -2].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.42857142857142855
Recall: 1.0
AUC: 0.9893617021276595


In [40]:
# LOF
# dataset: Parkinson_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Parkinson\\Parkinson_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.2
Recall: 1.0
AUC: 0.9583333333333334


In [42]:
# LOF
# dataset: Pima_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Pima\\Pima_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = lof_outlier_detection(data, labels, n_neighbors=20)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.6048888888888888


ABOD

In [43]:
import numpy as np
import pandas as pd
from pyod.models.abod import ABOD
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

def abod_outlier_detection(data, ground_truth):
    """
    Performs ABOD outlier detection on the dataset and evaluates it using precision, recall, and AUC.

    Parameters:
        data (numpy array): Input dataset of shape (n_samples, n_features).
        ground_truth (numpy array or pd.Series): Ground truth labels (1 = outlier, 0 = normal).
        contamination (float): Proportion of the dataset expected to be outliers.

    Returns:
        dict: Precision, recall, and AUC scores.
    """
    # Standardize the dataset
    scaler = StandardScaler()
    data_std = scaler.fit_transform(data)

    # Initialize the ABOD model
    abod = ABOD()
    
    # Fit the model
    abod.fit(data_std)

    # Get outlier scores and predictions
    outlier_scores = abod.decision_scores_  # Outlier scores (higher is more anomalous)
    predictions = abod.predict(data_std)   # Predictions (1 = outlier, 0 = normal)

    # Calculate evaluation metrics
    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return  precision, recall, auc


In [44]:
# ABOD
# dataset: Annthyroid_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Annthyroid\\Annthyroid_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0715307582260372
Recall: 0.36764705882352944
AUC: 0.7733870967741935


In [46]:
# ABOD
# dataset: Arrhythmia_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Arrhythmia\\Arrhythmia_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.034482758620689655
Recall: 0.3333333333333333
AUC: 0.7909836065573771


In [47]:
# ABOD
# dataset: Cardiotocography_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Cardiotocography\\Cardiotocography_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.053763440860215055
Recall: 0.3125
AUC: 0.7116125377643505


In [48]:
# ABOD
# dataset: HeartDisease_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\HeartDisease\\HeartDisease_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.0
Recall: 0.0
AUC: 0.7333333333333334


In [49]:
# ABOD
# dataset: Hepatitis_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Hepatitis\\Hepatitis_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.09090909090909091
Recall: 0.3333333333333333
AUC: 0.7070707070707071


In [50]:
# ABOD
# dataset: Lymphography_withoutdupl_norm_1ofn.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Lymphography\\Lymphography_withoutdupl_norm_1ofn.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-2].values
    
    # Separate labels (last column)
labels = df.iloc[:, -2].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.3333333333333333
Recall: 1.0
AUC: 0.9716312056737588


In [51]:
# ABOD
# dataset: Parkinson_withoutdupl_norm_05_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Parkinson\\Parkinson_withoutdupl_norm_05_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.16666666666666666
Recall: 1.0
AUC: 0.9583333333333334


In [52]:
# ABOD
# dataset: Pima_withoutdupl_norm_02_v01.csv

file_path = "G:\\Nazanin\\B project\\code\\dataset\\Pima\\Pima_withoutdupl_norm_02_v01.csv"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)
    
    # Separate data (all columns except the last)
data = df.iloc[:, :-1].values
    
    # Separate labels (last column)
labels = df.iloc[:, -1].values

# Evaluate the outlier detection performance
precision, recall, auc = abod_outlier_detection(data, labels)

print("Precision:", precision)
print("Recall:", recall)
print("AUC:", auc)


Precision: 0.03571428571428571
Recall: 0.2222222222222222
AUC: 0.6888888888888889
