In [4]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    roc_auc_score
)

In [5]:
# =========================
# 1. CONFIGURAÇÕES GERAIS
# =========================
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 256
EPOCHS = 10
LEARNING_RATE = 0.001
THRESHOLD = 0.7333  # threshold operacional escolhido

torch.manual_seed(42)
np.random.seed(42)

In [6]:
# =========================
# 2. CARREGAMENTO DOS DADOS
# =========================
train_path = "../raw_data/UNSW_NB15_training-set.csv"
test_path  = "../raw_data/UNSW_NB15_testing-set.csv"

train_df = pd.read_csv(train_path)
test_df  = pd.read_csv(test_path)

In [7]:
# =========================
# 3. SEPARAÇÃO X / y
# =========================
target_col = "label"

# remover colunas não numéricas e identificador
cols_to_drop = ["id", "proto", "service", "state", "attack_cat"]
train_df = train_df.drop(columns=cols_to_drop)
test_df  = test_df.drop(columns=cols_to_drop)

X_train = train_df.drop(columns=[target_col])
y_train = train_df[target_col]

X_test = test_df.drop(columns=[target_col])
y_test = test_df[target_col]

In [8]:
# =========================
# 4. NORMALIZAÇÃO
# =========================
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

In [9]:
# =========================
# 5. TENSORES PYTORCH
# =========================
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [10]:
# =========================
# 6. MODELO MLP
# =========================
class MLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)  # saída logit
        )

    def forward(self, x):
        return self.net(x).squeeze(1)

model = MLP(X_train.shape[1]).to(DEVICE)

In [12]:
# =========================
# 7. LOSS SENSÍVEL A CUSTO
# =========================
n_positive = y_train.sum()
n_negative = len(y_train) - n_positive

pos_weight = torch.tensor([n_negative / n_positive], device=DEVICE)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [13]:
# =========================
# 8. TREINAMENTO
# =========================
model.train()

for epoch in range(EPOCHS):
    epoch_loss = 0.0

    for xb, yb in train_loader:
        xb = xb.to(DEVICE)
        yb = yb.to(DEVICE)

        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {avg_loss:.4f}")

Epoch 1/10 - Loss: 0.1232
Epoch 2/10 - Loss: 0.0934
Epoch 3/10 - Loss: 0.0902
Epoch 4/10 - Loss: 0.0885
Epoch 5/10 - Loss: 0.0875
Epoch 6/10 - Loss: 0.0863
Epoch 7/10 - Loss: 0.0856
Epoch 8/10 - Loss: 0.0847
Epoch 9/10 - Loss: 0.0841
Epoch 10/10 - Loss: 0.0830


In [14]:
# =========================
# 9. AVALIAÇÃO
# =========================
model.eval()

with torch.no_grad():
    logits_test = model(X_test_tensor.to(DEVICE))
    y_scores = torch.sigmoid(logits_test).cpu().numpy()

y_pred = (y_scores >= THRESHOLD).astype(int)

In [15]:
# =========================
# 10. MÉTRICAS
# =========================
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

accuracy  = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=0)
recall    = recall_score(y_test, y_pred)
fpr       = fp / (fp + tn)
roc_auc   = roc_auc_score(y_test, y_scores)

print("\nRESULTADOS FINAIS")
print(f"Threshold : {THRESHOLD:.4f}")
print(f"Accuracy  : {accuracy*100:.2f}%")
print(f"Precision : {precision*100:.2f}%")
print(f"Recall    : {recall*100:.2f}%")
print(f"FPR       : {fpr*100:.2f}%")
print(f"ROC AUC   : {roc_auc*100:.2f}%")


RESULTADOS FINAIS
Threshold : 0.7333
Accuracy  : 88.78%
Precision : 93.50%
Recall    : 85.58%
FPR       : 7.29%
ROC AUC   : 96.66%
