In [None]:
import os
from outliers_detection import OutliersDetection
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.auto_encoder import AutoEncoder
from pyod.models.knn import KNN
from pyod.models.ocsvm import OCSVM
from pyod.models.vae import VAE

In [2]:
# Define the models and sub-datasets
models = {
    "AutoEncoder": AutoEncoder(
        hidden_neuron_list=[64, 32],
        hidden_activation_name='relu',
        dropout_rate=0.2,
        lr=0.001,
        epoch_num=20,
        batch_size=64,
        contamination=0.01,
        device='cuda',
        verbose=1
    ),
    "Variational AutoEncoder (VAE)": VAE(
        encoder_neuron_list=[128, 64, 32],
        decoder_neuron_list=[32, 64, 128],
        latent_dim=2,
        hidden_activation_name='relu',
        output_activation_name='sigmoid',
        dropout_rate=0.2,
        lr=0.001,
        epoch_num=30,
        batch_size=64,
        contamination=0.01,
        device='cuda',
        verbose=1
    ),
    "Isolation Forest": IForest(
        contamination=0.1,
        random_state=42
    )
}
sub_datasets = [
    "Health_and_Personal_Care",
    "Magazine_Subscriptions",
    "Subscription_Boxes"
]

In [None]:
# Loop through each model and each sub-dataset
for sub_dataset in sub_datasets:
    for model_name, model_conf in models.items():
        # Define save directory based on model and dataset names
        save_dir = f"fast_api/all_results/{sub_dataset}/{model_name}_with_numerical_data"
        os.makedirs(save_dir, exist_ok=True)
        # Create an instance of OutliersDetection
        print(f"\nTraining {model_name} on {sub_dataset} dataset...")
        detector = OutliersDetection(
            model_conf=model_conf,
            model_name=model_name,
            sub_dataset=sub_dataset,
            add_numerical_data=True,
            save_dir=save_dir
        )
        
        # Perform distribution shift scoring if needed
        detector.distribution_shift_scoring(detector.X_train, detector.X_test, save_dir=save_dir, filename="distribution_shift.png")
        
        print(f"Finished training {model_name} on {sub_dataset}.\n")
