In [3]:
#ECOD 2022
import numpy as np
import pandas as pd
from pyod.models.ecod import ECOD
from sklearn.metrics import precision_score, recall_score, roc_auc_score

def calculate_ecod_outlier_scores(data):
    """
    Calculate outlier scores using the ECOD model.

    Args:
        data: Input dataset (numpy array or pandas DataFrame).

    Returns:
        outlier_scores: Array of outlier scores.
    """
    # Apply ECOD from pyod
    ecod = ECOD()
    ecod.fit(data)

    # Get the outlier scores
    outlier_scores = ecod.decision_scores_  # higher score -> more outlier

    return outlier_scores

def evaluate_ecod_outlier_detection(outlier_scores, ground_truth):
    """
    Evaluate ECOD outlier detection results using precision, recall, and AUC.

    Args:
        outlier_scores: Array of outlier scores.
        ground_truth: Ground truth labels (numpy array or pandas Series).

    Returns:
        precision: Precision of the model.
        recall: Recall of the model.
        auc: AUC of the model.
    """
    # Calculate precision, recall, and AUC
    threshold = np.percentile(outlier_scores, 90)  # Setting threshold at the 90th percentile for demonstration
    predictions = (outlier_scores >= threshold).astype(int)  # Classify as 1 if above threshold (outlier), else 0

    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return precision, recall, auc



In [4]:
# ECOD
# dataset: Annthyroid_norm_02_v01.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# File path to the dataset
file_path = "G:\\Nazanin\\B project\\code\\dataset\\Annthyroid\\Annthyroid_norm_02_v01.csv"

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)

# Separate data (all columns except the last)
data = df.iloc[:, :-1].values

# Separate labels (last column)
labels = df.iloc[:, -1].values

# Split the data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, stratify=labels, random_state=42)

# Calculate outlier scores on the training set
predicted_outlier_scores = calculate_ecod_outlier_scores(X_train)
        
# Evaluate the performance on the validation set
val_outlier_scores = calculate_ecod_outlier_scores(X_val)
        
# Evaluate the performance
precision, recall, auc = evaluate_ecod_outlier_detection(val_outlier_scores, y_val)

print("Evaluation Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"AUC: {auc:.4f}")



Evaluation Metrics:
Precision: 0.0511
Recall: 0.2593
AUC: 0.7380


In [5]:
# ECOD
# dataset: Arrhythmia_withoutdupl_norm_02_v01.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# File path to the dataset
file_path = "G:\\Nazanin\\B project\\code\\dataset\\Arrhythmia\\Arrhythmia_withoutdupl_norm_02_v01.csv"

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)

# Separate data (all columns except the last)
data = df.iloc[:, :-1].values

# Separate labels (last column)
labels = df.iloc[:, -1].values

# Split the data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, stratify=labels, random_state=42)

# Calculate outlier scores on the training set
predicted_outlier_scores = calculate_ecod_outlier_scores(X_train)
        
# Evaluate the performance on the validation set
val_outlier_scores = calculate_ecod_outlier_scores(X_val)
        
# Evaluate the performance
precision, recall, auc = evaluate_ecod_outlier_detection(val_outlier_scores, y_val)

print("Evaluation Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"AUC: {auc:.4f}")



Evaluation Metrics:
Precision: 0.0000
Recall: 0.0000
AUC: 0.2653


  return np.nan_to_num(skew_sp(X, axis=axis))
  return np.nan_to_num(skew_sp(X, axis=axis))


In [6]:
# ECOD
# dataset: Cardiotocography_norm_02_v01.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# File path to the dataset
file_path = "G:\\Nazanin\\B project\\code\\dataset\\Cardiotocography\\Cardiotocography_norm_02_v01.csv"

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)

# Separate data (all columns except the last)
data = df.iloc[:, :-1].values

# Separate labels (last column)
labels = df.iloc[:, -1].values

# Split the data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, stratify=labels, random_state=42)

# Calculate outlier scores on the training set
predicted_outlier_scores = calculate_ecod_outlier_scores(X_train)
        
# Evaluate the performance on the validation set
val_outlier_scores = calculate_ecod_outlier_scores(X_val)
        
# Evaluate the performance
precision, recall, auc = evaluate_ecod_outlier_detection(val_outlier_scores, y_val)

print("Evaluation Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"AUC: {auc:.4f}")



Evaluation Metrics:
Precision: 0.0588
Recall: 0.3333
AUC: 0.8680


In [8]:
# ECOD
# dataset: HeartDisease_withoutdupl_norm_02_v01.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# File path to the dataset
file_path = "G:\\Nazanin\\B project\\code\\dataset\\HeartDisease\\HeartDisease_withoutdupl_norm_02_v01.csv"

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)

# Separate data (all columns except the last)
data = df.iloc[:, :-1].values

# Separate labels (last column)
labels = df.iloc[:, -1].values

# Split the data into training and validation sets (70% train, 30% validation)
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.3, stratify=labels, random_state=42)

# Calculate outlier scores on the training set
predicted_outlier_scores = calculate_ecod_outlier_scores(X_train)
        
# Evaluate the performance on the validation set
val_outlier_scores = calculate_ecod_outlier_scores(X_val)
        
# Evaluate the performance
precision, recall, auc = evaluate_ecod_outlier_detection(val_outlier_scores, y_val)

print("Evaluation Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"AUC: {auc:.4f}")



Evaluation Metrics:
Precision: 0.2000
Recall: 1.0000
AUC: 0.9111


In [12]:
# ECOD
# dataset: Hepatitis_withoutdupl_norm_05_v01.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# File path to the dataset
file_path = "G:\\Nazanin\\B project\\code\\dataset\\Hepatitis\\Hepatitis_withoutdupl_norm_05_v01.csv"

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)

# Separate data (all columns except the last)
data = df.iloc[:, :-1].values

# Separate labels (last column)
labels = df.iloc[:, -1].values

# Split the data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, stratify=labels, random_state=42)

# Calculate outlier scores on the training set
predicted_outlier_scores = calculate_ecod_outlier_scores(X_train)
        
# Evaluate the performance on the validation set
val_outlier_scores = calculate_ecod_outlier_scores(X_val)
        
# Evaluate the performance
precision, recall, auc = evaluate_ecod_outlier_detection(val_outlier_scores, y_val)

print("Evaluation Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"AUC: {auc:.4f}")



Evaluation Metrics:
Precision: 0.5000
Recall: 1.0000
AUC: 1.0000


In [13]:
# ECOD
# dataset: Lymphography_withoutdupl_norm_1ofn.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# File path to the dataset
file_path = "G:\\Nazanin\\B project\\code\\dataset\\Lymphography\\Lymphography_withoutdupl_norm_1ofn.csv"

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)

# Separate data (all columns except the last)
data = df.iloc[:, :-2].values

# Separate labels (last column)
labels = df.iloc[:, -2].values

# Split the data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, stratify=labels, random_state=42)

# Calculate outlier scores on the training set
predicted_outlier_scores = calculate_ecod_outlier_scores(X_train)
        
# Evaluate the performance on the validation set
val_outlier_scores = calculate_ecod_outlier_scores(X_val)
        
# Evaluate the performance
precision, recall, auc = evaluate_ecod_outlier_detection(val_outlier_scores, y_val)

print("Evaluation Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"AUC: {auc:.4f}")



Evaluation Metrics:
Precision: 0.3333
Recall: 1.0000
AUC: 1.0000


In [15]:
# ECOD
# dataset: Parkinson_withoutdupl_norm_05_v01.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# File path to the dataset
file_path = "G:\\Nazanin\\B project\\code\\dataset\\Parkinson\\Parkinson_withoutdupl_norm_05_v01.csv"

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)

# Separate data (all columns except the last)
data = df.iloc[:, :-1].values

# Separate labels (last column)
labels = df.iloc[:, -1].values

# Split the data into training and validation sets (70% train, 30% validation)
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.3, stratify=labels, random_state=42)

# Calculate outlier scores on the training set
predicted_outlier_scores = calculate_ecod_outlier_scores(X_train)
        
# Evaluate the performance on the validation set
val_outlier_scores = calculate_ecod_outlier_scores(X_val)
        
# Evaluate the performance
precision, recall, auc = evaluate_ecod_outlier_detection(val_outlier_scores, y_val)

print("Evaluation Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"AUC: {auc:.4f}")



ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

In [16]:
# ECOD
# dataset: Pima_withoutdupl_norm_02_v01.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# File path to the dataset
file_path = "G:\\Nazanin\\B project\\code\\dataset\\Pima\\Pima_withoutdupl_norm_02_v01.csv"

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)

# Separate data (all columns except the last)
data = df.iloc[:, :-1].values

# Separate labels (last column)
labels = df.iloc[:, -1].values

# Split the data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, stratify=labels, random_state=42)

# Calculate outlier scores on the training set
predicted_outlier_scores = calculate_ecod_outlier_scores(X_train)
        
# Evaluate the performance on the validation set
val_outlier_scores = calculate_ecod_outlier_scores(X_val)
        
# Evaluate the performance
precision, recall, auc = evaluate_ecod_outlier_detection(val_outlier_scores, y_val)

print("Evaluation Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"AUC: {auc:.4f}")



Evaluation Metrics:
Precision: 0.0000
Recall: 0.0000
AUC: 0.5000


In [19]:
#COPOD 2020
import numpy as np
import pandas as pd
from pyod.models.copod import COPOD
from sklearn.metrics import precision_score, recall_score, roc_auc_score

def calculate_copod_outlier_scores(data):
    """
    Calculate outlier scores using the COPOD model.

    Args:
        data: Input dataset (numpy array or pandas DataFrame).

    Returns:
        outlier_scores: Array of outlier scores.
    """
    # Apply COPOD from pyod
    copod = COPOD()
    copod.fit(data)

    # Get the outlier scores
    outlier_scores = copod.decision_scores_  # higher score -> more outlier

    return outlier_scores

def evaluate_copod_outlier_detection(outlier_scores, ground_truth):
    """
    Evaluate ECOD outlier detection results using precision, recall, and AUC.

    Args:
        outlier_scores: Array of outlier scores.
        ground_truth: Ground truth labels (numpy array or pandas Series).

    Returns:
        precision: Precision of the model.
        recall: Recall of the model.
        auc: AUC of the model.
    """
    # Calculate precision, recall, and AUC
    threshold = np.percentile(outlier_scores, 90)  # Setting threshold at the 90th percentile for demonstration
    predictions = (outlier_scores >= threshold).astype(int)  # Classify as 1 if above threshold (outlier), else 0

    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return precision, recall, auc



In [20]:
#ROD 2020
import numpy as np
import pandas as pd
from pyod.models.rod import ROD
from sklearn.metrics import precision_score, recall_score, roc_auc_score

def calculate_rod_outlier_scores(data):
    """
    Calculate outlier scores using the ROD model.

    Args:
        data: Input dataset (numpy array or pandas DataFrame).

    Returns:
        outlier_scores: Array of outlier scores.
    """
    # Apply ROD from pyod
    rod = ROD()
    rod.fit(data)

    # Get the outlier scores
    outlier_scores = rod.decision_scores_  # higher score -> more outlier

    return outlier_scores

def evaluate_rod_outlier_detection(outlier_scores, ground_truth):
    """
    Evaluate ECOD outlier detection results using precision, recall, and AUC.

    Args:
        outlier_scores: Array of outlier scores.
        ground_truth: Ground truth labels (numpy array or pandas Series).

    Returns:
        precision: Precision of the model.
        recall: Recall of the model.
        auc: AUC of the model.
    """
    # Calculate precision, recall, and AUC
    threshold = np.percentile(outlier_scores, 90)  # Setting threshold at the 90th percentile for demonstration
    predictions = (outlier_scores >= threshold).astype(int)  # Classify as 1 if above threshold (outlier), else 0

    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return precision, recall, auc



In [22]:
#DIF 2023
import numpy as np
import pandas as pd
from pyod.models.iforest import IForest
from sklearn.metrics import precision_score, recall_score, roc_auc_score

def calculate_dif_outlier_scores(data):
    """
    Calculate outlier scores using the ROD model.

    Args:
        data: Input dataset (numpy array or pandas DataFrame).

    Returns:
        outlier_scores: Array of outlier scores.
    """
    # Apply ROD from pyod
    dif = DIF()
    dif.fit(data)

    # Get the outlier scores
    outlier_scores = dif.decision_scores_  # higher score -> more outlier

    return outlier_scores

def evaluate_dif_outlier_detection(outlier_scores, ground_truth):
    """
    Evaluate ECOD outlier detection results using precision, recall, and AUC.

    Args:
        outlier_scores: Array of outlier scores.
        ground_truth: Ground truth labels (numpy array or pandas Series).

    Returns:
        precision: Precision of the model.
        recall: Recall of the model.
        auc: AUC of the model.
    """
    # Calculate precision, recall, and AUC
    threshold = np.percentile(outlier_scores, 90)  # Setting threshold at the 90th percentile for demonstration
    predictions = (outlier_scores >= threshold).astype(int)  # Classify as 1 if above threshold (outlier), else 0

    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return precision, recall, auc



In [25]:
#SUOD 2021
import numpy as np
import pandas as pd
from pyod.models.suod import SUOD
from pyod.models.iforest import IForest
from sklearn.metrics import precision_score, recall_score, roc_auc_score

def calculate_suod_outlier_scores(data):
    """
    Calculate outlier scores using the SUOD model.

    Args:
        data: Input dataset (numpy array or pandas DataFrame).

    Returns:
        outlier_scores: Array of outlier scores.
    """
    # Apply ROD from pyod
    suod = SUOD()
    suod.fit(data)

    # Get the outlier scores
    outlier_scores = suod.decision_scores_  # higher score -> more outlier

    return outlier_scores

def evaluate_suod_outlier_detection(outlier_scores, ground_truth):
    """
    Evaluate outlier detection results using precision, recall, and AUC.

    Args:
        outlier_scores: Array of outlier scores.
        ground_truth: Ground truth labels (numpy array or pandas Series).

    Returns:
        precision: Precision of the model.
        recall: Recall of the model.
        auc: AUC of the model.
    """
    # Calculate precision, recall, and AUC
    threshold = np.percentile(outlier_scores, 90)  # Setting threshold at the 90th percentile for demonstration
    predictions = (outlier_scores >= threshold).astype(int)  # Classify as 1 if above threshold (outlier), else 0

    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return precision, recall, auc



In [27]:
#LUNAR 2022
import numpy as np
import pandas as pd
from pyod.models.lunar import LUNAR
from sklearn.metrics import precision_score, recall_score, roc_auc_score

def calculate_lunar_outlier_scores(data):
    """
    Calculate outlier scores using the lunar model.

    Args:
        data: Input dataset (numpy array or pandas DataFrame).

    Returns:
        outlier_scores: Array of outlier scores.
    """
    # Apply ROD from pyod
    lunar = LUNAR()
    lunar.fit(data)

    # Get the outlier scores
    outlier_scores = lunar.decision_scores_  # higher score -> more outlier

    return outlier_scores

def evaluate_lunar_outlier_detection(outlier_scores, ground_truth):
    """
    Evaluate outlier detection results using precision, recall, and AUC.

    Args:
        outlier_scores: Array of outlier scores.
        ground_truth: Ground truth labels (numpy array or pandas Series).

    Returns:
        precision: Precision of the model.
        recall: Recall of the model.
        auc: AUC of the model.
    """
    # Calculate precision, recall, and AUC
    threshold = np.percentile(outlier_scores, 90)  # Setting threshold at the 90th percentile for demonstration
    predictions = (outlier_scores >= threshold).astype(int)  # Classify as 1 if above threshold (outlier), else 0

    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    auc = roc_auc_score(ground_truth, outlier_scores)

    return precision, recall, auc

