In [1]:
import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor
from sklearn.model_selection import train_test_split

In [2]:
RANDOM_SEED = 42
DATA_PATH = "data/"
DATA_FILE = "processed_traffic.parquet"

In [3]:
data = pd.read_parquet(DATA_PATH + DATA_FILE)
data_train, data_test = train_test_split(data, test_size=0.1, random_state=RANDOM_SEED)
data_train.shape, data_test.shape

((55960, 49), (6218, 49))

In [4]:
data_train_smol = data_train.sample(frac=0.01, random_state=RANDOM_SEED)
data_test_smol = data_test.sample(frac=0.1, random_state=RANDOM_SEED)
data_train_smol.shape, data_test_smol.shape

((560, 49), (622, 49))

In [5]:
train_dataset = TabularDataset(data_train.drop(columns=["Attack Name"]))
test_dataset = TabularDataset(data_test.drop(columns=["Attack Name"]))
train_dataset_smol = TabularDataset(data_train_smol.drop(columns=["Attack Name"]))
test_dataset_smol = TabularDataset(data_test_smol.drop(columns=["Attack Name"]))

# Training

In [6]:
mitra = TabularPredictor(
    label="Label",
    problem_type="binary",
    eval_metric="accuracy",
)
mitra.fit(
    train_data=train_dataset_smol,
    presets=['high_quality'],
    time_limit=600,
    hyperparameters={
        'MITRA': {
            'fine_tune': False,
            'ag_args_fit': {'num_gpus': 1}
        }
    },
)

No path specified. Models will be saved in: "AutogluonModels/ag-20251126_020445"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.12.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #202403110203~1715181801~22.04~aba43ee SMP PREEMPT_DYNAMIC Wed M
CPU Count:          12
Memory Avail:       9.80 GB / 15.46 GB (63.4%)
Disk Space Avail:   380.33 GB / 911.02 GB (41.7%)
Presets specified: ['high_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Note: `save_bag_folds=False`! This will greatly reduce peak disk usage during fit (by ~8x), but runs the risk of an out-of-memory error during model refit if memory is small relative to the data size.
	You can avoid this risk by setting `save_bag_folds=True`.
DyStack is enabled (dynamic_stacking=True). AutoGluo

KeyboardInterrupt: 



# Inference

In [6]:
# Load previously trained model
# model = TabularPredictor.load("AutogluonModels/Colab/ag-20251125_034417")
# model = TabularPredictor.load("AutogluonModels/ag-20251125_152348")
model = TabularPredictor.load("AutogluonModels/ag-20251125_164251")

In [7]:
# Set decision threshold for splitting 0/1 to maximize accuracy
# Having it maximize precision or recall (for the 1 malicious class) heavily skews threshold towards 1 or 0 and costs the other metrics
model.calibrate_decision_threshold(metric="accuracy")

0.484

In [8]:
model.set_decision_threshold(0.484)

In [9]:
preds = model.predict(test_dataset.drop(columns=["Label"]))
probs = model.predict_proba(test_dataset.drop(columns=["Label"]))

In [12]:
c = data_test.copy()
c['Predicted Label'] = preds
c['Predicted Probability'] = probs.max(axis=1)
c['Other Class Probability'] = probs.min(axis=1)

In [13]:
from sklearn.metrics import classification_report
print(classification_report(c["Label"], c["Predicted Label"]))

              precision    recall  f1-score   support

           0       0.82      0.95      0.88      3264
           1       0.93      0.78      0.85      2954

    accuracy                           0.87      6218
   macro avg       0.88      0.86      0.86      6218
weighted avg       0.87      0.87      0.86      6218



In [14]:
c[c["Label"] != c["Predicted Label"]]["Attack Name"].value_counts()

Attack Name
Benign Traffic              174
DDoS ICMP Flood             152
DoS ICMP Flood              152
DoS UDP Flood               143
DDoS UDP Flood              122
MITM ARP Spoofing            50
MQTT Malformed               21
MQTT DoS Publish Flood       15
Recon Vulnerability Scan      3
Recon Ping Sweep              2
Name: count, dtype: int64

# Feature Importances

In [33]:
importances = model.feature_importance(data_test_smol.drop(columns=["Attack Name"]))

These features in provided data are not utilized by the predictor and will be ignored: ['ECE Flag Count']
Computing feature importance via permutation shuffling for 46 features using 622 rows with 5 shuffle sets...
	1115.63s	= Expected runtime (223.13s per shuffle set)
	827.71s	= Actual runtime (Completed 5 of 5 shuffle sets)


In [35]:
pd.set_option('display.float_format', '{:.8f}'.format)
importances

Unnamed: 0,importance,stddev,p_value,n,p99_high,p99_low
Src Port,0.0170418,0.0038719,0.00029888,5,0.02501409,0.00906951
Fwd IAT Mean,0.01189711,0.00403534,0.00137116,5,0.02020592,0.00358829
Flow IAT Std,0.01061093,0.00183308,0.00010276,5,0.01438527,0.0068366
Average Packet Size,0.00900322,0.00183308,0.00019529,5,0.01277755,0.00522888
ACK Flag Count,0.00836013,0.00366616,0.00349369,5,0.0159088,0.00081146
Flow IAT Mean,0.00836013,0.00071899,6.5e-06,5,0.00984054,0.00687971
Packet Length Mean,0.00707395,0.00183308,0.00049584,5,0.01084829,0.00329962
Flow Packets/s,0.00643087,0.00196904,0.00093475,5,0.01048516,0.00237658
Packet Length Std,0.00610932,0.00209621,0.00143111,5,0.01042544,0.00179321
Fwd Act Data Pkts,0.00578778,0.00419241,0.01834099,5,0.01442002,-0.00284446
