In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    roc_auc_score,
    confusion_matrix
)

torch.manual_seed(42)
np.random.seed(42)

In [2]:
train_path = "../raw_data/UNSW_NB15_training-set.csv"
test_path  = "../raw_data/UNSW_NB15_testing-set.csv"

assert os.path.exists(train_path)
assert os.path.exists(test_path)

train_df = pd.read_csv(train_path)
test_df  = pd.read_csv(test_path)

In [3]:
target_col = "label"

drop_cols = [
    "id",
    "attack_cat",
    "proto",
    "service",
    "state"
]

X_train = train_df.drop(columns=[target_col] + drop_cols)
y_train = train_df[target_col]

X_test  = test_df.drop(columns=[target_col] + drop_cols)
y_test  = test_df[target_col]

In [4]:
scaler = StandardScaler()

X_train_np = scaler.fit_transform(X_train).astype(np.float32)
X_test_np  = scaler.transform(X_test).astype(np.float32)

y_train_np = y_train.values.astype(np.float32)
y_test_np  = y_test.values.astype(np.float32)


In [5]:
X_train_t = torch.from_numpy(X_train_np)
y_train_t = torch.from_numpy(y_train_np).unsqueeze(1)

train_ds = TensorDataset(X_train_t, y_train_t)
train_loader = DataLoader(train_ds, batch_size=256, shuffle=True)


In [6]:
class IDSNet(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.net(x)


In [7]:
model = IDSNet(X_train_np.shape[1])


In [8]:
n_pos = np.sum(y_train_np == 1)
n_neg = np.sum(y_train_np == 0)

pos_weight = torch.tensor([n_neg / n_pos], dtype=torch.float32)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [10]:
EPOCHS = 10

model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {total_loss/len(train_loader):.4f}")


Epoch 1/10 - Loss: 0.0829
Epoch 2/10 - Loss: 0.0822
Epoch 3/10 - Loss: 0.0817
Epoch 4/10 - Loss: 0.0810
Epoch 5/10 - Loss: 0.0805
Epoch 6/10 - Loss: 0.0798
Epoch 7/10 - Loss: 0.0794
Epoch 8/10 - Loss: 0.0788
Epoch 9/10 - Loss: 0.0786
Epoch 10/10 - Loss: 0.0777


In [11]:
model.eval()
with torch.no_grad():
    logits = model(torch.from_numpy(X_test_np))
    y_scores = torch.sigmoid(logits).squeeze().numpy()


In [12]:
THRESHOLD = 0.7333
y_pred_layer1 = (y_scores >= THRESHOLD).astype(int)


In [13]:
T_LOW  = 0.60
T_HIGH = 0.80

WINDOW_SIZE = 20
MIN_SUSPECT_EVENTS = 3
MEAN_SCORE_THRESHOLD = 0.72


def second_layer_decision(scores):
    final_pred = np.zeros_like(scores, dtype=int)

    for i in range(len(scores)):

        if scores[i] >= T_HIGH:
            final_pred[i] = 1
            continue

        if scores[i] < T_LOW:
            final_pred[i] = 0
            continue

        start = max(0, i - WINDOW_SIZE)
        window_scores = scores[start:i+1]

        suspect_events = np.sum(window_scores >= T_LOW)
        mean_score = np.mean(window_scores)

        if (suspect_events >= MIN_SUSPECT_EVENTS) or (mean_score >= MEAN_SCORE_THRESHOLD):
            final_pred[i] = 1
        else:
            final_pred[i] = 0

    return final_pred


In [14]:
y_pred_layer2 = second_layer_decision(y_scores)


In [15]:
tn, fp, fn, tp = confusion_matrix(y_test_np, y_pred_layer2).ravel()

accuracy  = accuracy_score(y_test_np, y_pred_layer2)
precision = precision_score(y_test_np, y_pred_layer2)
recall    = recall_score(y_test_np, y_pred_layer2)
fpr       = fp / (fp + tn)
auc       = roc_auc_score(y_test_np, y_scores)

print("\nRESULTADOS FINAIS — SISTEMA EM DUAS CAMADAS")
print(f"Accuracy  : {accuracy*100:.2f}%")
print(f"Precision : {precision*100:.2f}%")
print(f"Recall    : {recall*100:.2f}%")
print(f"FPR       : {fpr*100:.2f}%")
print(f"ROC AUC   : {auc*100:.2f}%")



RESULTADOS FINAIS — SISTEMA EM DUAS CAMADAS
Accuracy  : 88.42%
Precision : 87.02%
Recall    : 92.81%
FPR       : 16.96%
ROC AUC   : 96.38%
