In [1]:
import sys
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
import librosa
import numpy as np
import pandas as pd
from pyod.models.abod import ABOD
from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from deepod.metrics import ts_metrics
from deepod.metrics import point_adjustment 
import random
import torch
sys.path.append("/home/iot/GSVDD/compaison/traditional_method/SVDD-Python")
from src.BaseSVDD import BaseSVDD
%matplotlib inline

In [18]:
def load_data(dataset):
    if dataset == 0:
        train_data = np.load('../../../TranAD/processed/SWaT/train.npy')
        test_data = np.load('../../../TranAD/processed/SWaT/test.npy')
        labels = np.load('../../../TranAD/processed/SWaT/labels.npy')
    elif dataset == 1:
        train_data = np.load('/home/iot/GSVDD/TranAD/processed/SMAP/P-1_train.npy')
        test_data = np.load('/home/iot/GSVDD/TranAD/processed/SMAP/P-1_test.npy')
        labels = np.load('/home/iot/GSVDD/TranAD/processed/SMAP/P-1_labels.npy')[:, 0]
    elif dataset == 2:
        train_data = np.load('/home/iot/GSVDD/TranAD/processed/MSL/C-1_train.npy')
        test_data = np.load('/home/iot/GSVDD/TranAD/processed/MSL/C-1_test.npy')
        labels = np.load('/home/iot/GSVDD/TranAD/processed/MSL/C-1_labels.npy')[:, 0]
    return train_data, test_data, labels

# List of models to evaluate
# models = {
#     "IsolationForest": IsolationForest(),
#     "OneClassSVM": OneClassSVM(),
#     # "BaseSVDD":BaseSVDD(gamma=3,)
# }

# Results storage
results_table = []

# Repeat for 3 datasets
for dataset_idx in range(3):
    train_data, test_data, labels = load_data(dataset_idx)
    print(f"Dataset {dataset_idx}: Running experiments...")

    # Train, evaluate, and repeat 10 times for all models
    for model_name in ["IsolationForest","OneClassSVM","BaseSVDD"]:
        print(f"Running model {model_name}...")
        raw_metrics_list = []
        adj_metrics_list = []
        for run in range(10):
            print(f"Run {run + 1}/10 for {model_name}...")

            if model_name=='BaseSVDD':
                model = BaseSVDD(gamma=np.random.uniform(0.1, 5))
                model.fit(train_data)
                scores = model.get_distance(test_data)
                scores = np.array(scores)
            # Prediction and evaluation
            elif model_name=="IsolationForest":
                model = IsolationForest(random_state=42+run)
                model.fit(train_data)
                scores = model.decision_function(test_data)
            elif model_name=="OneClassSVM":
                nu_value = np.random.uniform(0.01, 0.5)
                gamma_value = np.random.uniform(0.01, 1.0)
                model = OneClassSVM(kernel='rbf', nu=nu_value, gamma=gamma_value)
                model.fit(train_data)
                scores = model.decision_function(test_data)
                
            raw_eval_metrics = ts_metrics(labels, scores)
            adjusted_metrics = ts_metrics(labels, point_adjustment(labels, scores))
            print(adjusted_metrics)
            # Collect raw and adjusted metrics
            raw_metrics_list.append([
                raw_eval_metrics[0],  # Raw AUC
                raw_eval_metrics[2],  # Raw F1
                raw_eval_metrics[3],  # Raw Precision
                raw_eval_metrics[4],  # Raw Recall
            ])
            adj_metrics_list.append([
                adjusted_metrics[0],  # Adjusted AUC
                adjusted_metrics[2],  # Adjusted F1
                adjusted_metrics[3],  # Adjusted Precision
                adjusted_metrics[4],  # Adjusted Recall
            ])

        # Calculate mean and standard deviation for both raw and adjusted metrics
        raw_metrics_array = np.array(raw_metrics_list)
        adj_metrics_array = np.array(adj_metrics_list)

        raw_means, raw_stds = np.mean(raw_metrics_array, axis=0), np.std(raw_metrics_array, axis=0)
        adj_means, adj_stds = np.mean(adj_metrics_array, axis=0), np.std(adj_metrics_array, axis=0)

        # Append the results for table format
        results_table.append([
            f"Dataset {dataset_idx}",
            model_name,
            f"{raw_means[0]:.3f} ± {raw_stds[0]:.3f}",  # Raw AUC
            f"{raw_means[1]:.3f} ± {raw_stds[1]:.3f}",  # Raw F1
            f"{raw_means[2]:.3f} ± {raw_stds[2]:.3f}",  # Raw Precision
            f"{raw_means[3]:.3f} ± {raw_stds[3]:.3f}",  # Raw Recall
            f"{adj_means[0]:.3f} ± {adj_stds[0]:.3f}",  # Adjusted AUC
            f"{adj_means[1]:.3f} ± {adj_stds[1]:.3f}",  # Adjusted F1
            f"{adj_means[2]:.3f} ± {adj_stds[2]:.3f}",  # Adjusted Precision
            f"{adj_means[3]:.3f} ± {adj_stds[3]:.3f}",  # Adjusted Recall
        ])

# Convert results to DataFrame
columns = [
    "Dataset", "Model", 
    "Raw AUC (Mean ± Std)", "Raw F1 (Mean ± Std)", 
    "Raw Precision (Mean ± Std)", "Raw Recall (Mean ± Std)",
    "Adjusted AUC (Mean ± Std)", "Adjusted F1 (Mean ± Std)", 
    "Adjusted Precision (Mean ± Std)", "Adjusted Recall (Mean ± Std)"
]
df_results = pd.DataFrame(results_table, columns=columns)

# Save results to a CSV file
df_results.to_csv("time_series_anomaly_metrics_summary_traiditional.csv", index=False)

# Display the table
print("\nResults Summary (Mean ± Std):")
print(df_results.to_string(index=False))

Dataset 0: Running experiments...
Running model IsolationForest...
Run 1/10 for IsolationForest...
(0.4292316159905092, 0.13131975346265684, 0.2598197457942873, 0.17513513513513512, 0.5031055900621118)
Run 2/10 for IsolationForest...
(0.4537772559017618, 0.14421693386768566, 0.25884204597900357, 0.18816067653276955, 0.41459627329192544)
Run 3/10 for IsolationForest...
(0.5282239033474212, 0.2298789448499829, 0.40256116424815463, 0.41625207296849087, 0.38975155279503104)
Run 4/10 for IsolationForest...
(0.39095161810082757, 0.11689276571206887, 0.24250338737026683, 0.15532286212914484, 0.5527950310559007)
Run 5/10 for IsolationForest...
(0.4505627634333168, 0.14371110029928108, 0.2702926863554518, 0.19840116279069767, 0.42391304347826086)
Run 6/10 for IsolationForest...
(0.4373385178721147, 0.13236774379301197, 0.2720283533401137, 0.18359587180879958, 0.5248447204968945)
Run 7/10 for IsolationForest...
(0.43845659445955887, 0.134267942350686, 0.23642852744299378, 0.1621465666474322, 0.4