In [1]:
import pandas as pd
%load_ext autoreload
%autoreload 2
from src.model.one_class import DeepSVDD
from src.model.reconstruction import DAGMM
from src.trainer.ids import DeepSVDDIDSTrainer, DAGMMIDSTrainer
from src.utils import metrics
from src.datamanager.dataset import IDS2017Dataset

dataset_path = "../data/IDS2017/ids2017.csv"
export_path = "./"

In [2]:
dataset = IDS2017Dataset(path=dataset_path)

# General arguments
batch_size = 1024
device = "cuda"
act_fn = "relu"
n_layers = 4
compression_factor = 2
lr = 1e-4
n_epochs = 1

# DAGMM-specific arguments
n_mixtures = 4
latent_dim = 1
lambda_1 = 0.005
lambda_2 = 0.1
reg_covar=1e-6

print("data loaded")

data loaded


## DeepSVDD Training

In [2]:
model = DeepSVDD(
    in_features=dataset.in_features,
    n_instances=dataset.n_instances,
    device=device,
    n_layers=n_layers,
    compression_factor=compression_factor,
    act_fn=act_fn,
)
trainer = DeepSVDDIDSTrainer(
    model=model,
    batch_size=batch_size,
    lr=lr,
    n_epochs=n_epochs,
    device=device
)
print("DeepSVDD model and trainer ready")

data loaded, model and trainers are ready


In [3]:
train_ldr, test_ldr = dataset.loaders(batch_size=batch_size, seed=42)
_ = trainer.train(train_ldr)
y_test_true, test_scores, test_labels = trainer.test(test_ldr)
results = metrics.estimate_optimal_threshold(test_scores, y_test_true)
results

  1%|          | 12/1110 [00:00<00:15, 72.73it/s, epoch=1, loss=0.114]

Started training


100%|██████████| 1110/1110 [00:10<00:00, 103.00it/s, epoch=1, loss=0.662]
100%|██████████| 1110/1110 [00:10<00:00, 104.14it/s, epoch=2, loss=0.001]


{'Precision': 0.661385870748257,
 'Recall': 0.762798062559738,
 'F1-Score': 0.7084813260592394,
 'AUPR': 0.7682288212786377,
 'AUROC': 0.8381214112711038,
 'Thresh_star': 6.686015581180982e-07,
 'Quantile_star': 62.081617244691174}

In [17]:
y_pred = (test_scores >= results["Thresh_star"]).astype(int)

In [18]:
mask = y_test_true == 1.
test_labels[mask]
df = pd.DataFrame(y_pred[mask])
df[0].value_counts()

1    425369
0    132274
Name: 0, dtype: int64

## DAGMM Training

In [3]:
model = DAGMM(
    in_features=dataset.in_features,
    n_instances=dataset.n_instances,
    device=device,
    n_mixtures=n_mixtures,
    latent_dim=latent_dim,
    lambda_1=lambda_1,
    lambda_2=lambda_2,
    reg_covar=reg_covar,
    n_layers=n_layers,
    compression_factor=compression_factor,
    ae_act_fn="relu",
    gmm_act_fn="relu"
)
trainer = DAGMMIDSTrainer(
    device=device,
    model=model,
    batch_size=batch_size,
    lr=lr,
    n_epochs=n_epochs
)
print("DAGMM model and trainer ready")

DAGMM model and trainer ready


In [5]:
train_ldr, test_ldr = dataset.loaders(batch_size=batch_size, seed=42)
trainer.train(train_ldr)
y_test_true, test_scores, test_labels = trainer.test(test_ldr)
results = metrics.estimate_optimal_threshold(test_scores, y_test_true)
results

  1%|          | 7/1110 [00:00<00:30, 36.36it/s, epoch=1, loss=202181.664]

Started training


100%|██████████| 1110/1110 [00:16<00:00, 67.05it/s, epoch=1, loss=1784025.856]


{'Precision': 0.5148341358450046,
 'Recall': 0.4367794449136813,
 'F1-Score': 0.4726056303183826,
 'AUPR': 0.5040549808583397,
 'AUROC': 0.5553845883742977,
 'Thresh_star': 0.4080483317375183,
 'Quantile_star': 72.08161724469117}