# Introdução aos Ataques DDoS no Dataset CICDDoS2019

O dataset contém múltiplos cenários de ataques, registrados em arquivos CSV, com detalhes sobre tráfego malicioso e legítimo.


# Pré-Processamento UEL - Gerando dados para treinamento

In [None]:
import pandas as pd
from itertools import cycle
import random

# 1. Carregar os arquivos
teste_ataque = pd.read_csv('data/cic_puro/teste_ataque_ordenado.csv', sep=';')
teste_normal = pd.read_csv('data/cic_puro/teste_sem_ataque_ordenado.csv', sep=';')
treino_ataque = pd.read_csv('data/cic_puro/treino_ataque_ordenado.csv', sep=';')
treino_normal = pd.read_csv('data/cic_puro/treino_sem_ataque_ordenado.csv', sep=';')


# 2. Concatenar para treino e teste
teste_full = pd.concat([teste_normal, teste_ataque], ignore_index=True)
treino_full = pd.concat([treino_normal, treino_ataque], ignore_index=True)

# 3. Separar normais e ataques
def prepare_data(df, max_per_attack=1000, max_normal=5000):
    normal = df[df['label'] == 0].sample(frac=1).reset_index(drop=True)  # embaralhar normais
    attacks = df[df['label'] == 1].reset_index(drop=True)

    # Agora limitar por tipo de ataque
    attack_types = {}
    for name, group in attacks.groupby('attack_name'):
        attack_types[name] = group.sample(n=min(len(group), max_per_attack)).reset_index(drop=True)

    # Limitar normais
    if max_normal is not None:
        normal = normal.sample(n=min(len(normal), max_normal)).reset_index(drop=True)

    return normal, attack_types

train_normal, train_attacks = prepare_data(treino_full, max_per_attack=1000, max_normal=10000)
test_normal, test_attacks = prepare_data(teste_full, max_per_attack=500, max_normal=5000)

# 4. Função para criar sequências aleatórias
def create_random_sequences(normal_df, attack_dict, min_seq=30, max_seq=150):
    final_rows = []
    
    normal_iter = normal_df.iterrows()
    attack_iters = {k: v.iterrows() for k, v in attack_dict.items()}
    attack_cycle = cycle(list(attack_iters.keys()))
    
    normal_remaining = True
    attack_remaining = True

    while normal_remaining or attack_remaining:
        choice = random.choice(['normal', 'attack'])  # Aleatoriamente decidir normal ou ataque primeiro
        
        if choice == 'normal' and normal_remaining:
            seq_len = random.randint(min_seq, max_seq)
            for _ in range(seq_len):
                try:
                    idx, row = next(normal_iter)
                    final_rows.append(row)
                except StopIteration:
                    normal_remaining = False
                    break
        
        elif choice == 'attack' and attack_remaining:
            attack_type = next(attack_cycle)
            seq_len = random.randint(min_seq, max_seq)
            for _ in range(seq_len):
                try:
                    idx, row = next(attack_iters[attack_type])
                    final_rows.append(row)
                except StopIteration:
                    # Se esgotar ataques desse tipo, remover do ciclo
                    del attack_iters[attack_type]
                    if attack_iters:
                        attack_cycle = cycle(list(attack_iters.keys()))
                    else:
                        attack_remaining = False
                    break
        else:
            # Se o tipo escolhido acabou, tenta o outro
            continue

    return pd.DataFrame(final_rows)

# 5. Criar datasets
train_final = create_random_sequences(train_normal, train_attacks, min_seq=30, max_seq=120)
test_final = create_random_sequences(test_normal, test_attacks, min_seq=30, max_seq=120)

# 6. Salvar
train_final.to_csv('treino_final_estratificado_random.csv', sep=';', index=False)
test_final.to_csv('teste_final_estratificado_random.csv', sep=';', index=False)

print('Arquivos treino_final_estratificado_random.csv e teste_final_estratificado_random.csv gerados com sequências aleatórias!')

Arquivos treino_final_estratificado_random.csv e teste_final_estratificado_random.csv gerados com sequências aleatórias!


In [None]:
# Contar a quantidade de cada valor na coluna 'attack_name'
attack_counts_train = train_final['attack_name'].value_counts()
attack_counts_test = test_final['attack_name'].value_counts()

# Exibir os resultados
print('Tamanho:', len(train_final), 'Treino:', attack_counts_train)
print('Total de linhas no conjunto de treino:', len(train_final))

print('Tamanho:', len(test_final), 'Teste:', attack_counts_test)
print('Total de linhas no conjunto de teste:', len(test_final))

Tamanho: 13 Treino: attack_name
normal           8074
DrDoS_DNS        1000
DrDoS_NTP        1000
DrDoS_SNMP       1000
DrDoS_UDP        1000
TFTP             1000
UDP-lag           885
DrDoS_SSDP        822
DrDoS_NetBIOS     726
DrDoS_MSSQL       687
DrDoS_LDAP        592
Syn               237
WebDDoS           125
Name: count, dtype: int64
Tamanho: 8 Teste: attack_name
normal     5000
LDAP        500
MSSQL       500
NetBIOS     500
Syn         500
UDP         500
UDPLag      470
Portmap     449
Name: count, dtype: int64


# Modelos

## LSTM

In [4]:
from models.LSTM.ModelLSTM import LSTM
from models.Sequence import SequenceDataset
from models.LSTM.TrainerLSTM import TrainerLSTM
from torch.utils.data import DataLoader
import torch

SEED = 42
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

# Configurar os parâmetros da rede LSTM
input_size = 9         # Número de features no dataset / Tamanho do vetor de entrada por tempo
hidden_size = 256       # Tamanho do hidden state / Nº de unidades ocultas por célula
num_layers = 3         # Número de camadas LSTM / Nº de camadas LSTM empilhadas
output_size = 2        # Classes: normal (0), anomalia (1) 
batch_size = 128        # Batch size / 
sequence_length = 10   # Tamanho da sequência de entrada para a LSTM
column_to_remove = 'attack_name'  # Coluna a ser removida

Usando dispositivo: cuda


In [None]:
# Criar os datasets
train_dataset = SequenceDataset('data/cic_puro/treino_final_estratificado_random.csv', sequence_length, column_to_remove=column_to_remove, normalize=True, mode='lstm')
test_dataset = SequenceDataset('data/cic_puro/teste_final_estratificado_random.csv', sequence_length, column_to_remove, normalize=True, mode='lstm')

print(f"Total de amostras no conjunto de treino: {len(train_dataset)}")
print(f"Total de amostras no conjunto de teste: {len(test_dataset)}")
print("Train Dataset Shape:", train_dataset.sequences.shape)
print("Test Dataset Shape:", test_dataset.sequences.shape)

# Criar os DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

print(f"Total de batches no conjunto de treino: {len(train_loader)}")
print(f"Total de batches no conjunto de teste: {len(test_loader)}")

# Criar o modelo
model = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, output_size=output_size).to(device)
print(model)

Total de amostras no conjunto de treino: 17139
Total de amostras no conjunto de teste: 8410
Train Dataset Shape: torch.Size([17139, 10, 9])
Test Dataset Shape: torch.Size([8410, 10, 9])


In [None]:
trainer = TrainerLSTM(dir_save="output/LSTM", num_epochs=100)
trainer.fit(model, train_loader, test_loader, device)

Epoch [1/100] Train Loss: 0.4143 Val Loss:   0.3635 Accuracy:   0.8725
🔖 Melhor modelo salvo!
Epoch [2/100] Train Loss: 0.1607 Val Loss:   0.3591 Accuracy:   0.8926
🔖 Melhor modelo salvo!
Epoch [3/100] Train Loss: 0.1231 Val Loss:   0.3107 Accuracy:   0.9001
🔖 Melhor modelo salvo!
Epoch [4/100] Train Loss: 0.0989 Val Loss:   0.2797 Accuracy:   0.8907
🔖 Melhor modelo salvo!
Epoch [5/100] Train Loss: 0.0911 Val Loss:   0.4735 Accuracy:   0.8895
Epoch [6/100] Train Loss: 0.0964 Val Loss:   0.2631 Accuracy:   0.9043
🔖 Melhor modelo salvo!
Epoch [7/100] Train Loss: 0.0857 Val Loss:   0.3939 Accuracy:   0.8484
Epoch [8/100] Train Loss: 0.0873 Val Loss:   0.2725 Accuracy:   0.8898
Epoch [9/100] Train Loss: 0.0873 Val Loss:   0.2713 Accuracy:   0.9100
Epoch [10/100] Train Loss: 0.0909 Val Loss:   0.2565 Accuracy:   0.8930
🔖 Melhor modelo salvo!
Epoch [11/100] Train Loss: 0.0752 Val Loss:   0.2610 Accuracy:   0.9074
Epoch [12/100] Train Loss: 0.0775 Val Loss:   0.2517 Accuracy:   0.9127
🔖 Melho

## CNN

In [1]:
from models.CNN.ModelCNN import CNN
from models.Sequence import SequenceDataset
from models.CNN.TrainerCNN import TrainerCNN
from torch.utils.data import DataLoader
import torch

SEED = 42
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

input_size = 9
sequence_length = 5
output_size = 2
batch_size = 128
num_epochs = 50
learning_rate = 0.0001
column_to_remove = 'attack_name'

Usando dispositivo: cuda


In [None]:
train_dataset = SequenceDataset('data/cic_puro/treino_final_estratificado_random.csv', sequence_length, column_to_remove, normalize=True, mode='cnn1d')
test_dataset = SequenceDataset('data/cic_puro/teste_final_estratificado_random.csv', sequence_length, column_to_remove, normalize=True, mode='cnn1d')

print(f"Total de amostras no conjunto de treino: {len(train_dataset)}")
print(f"Total de amostras no conjunto de teste: {len(test_dataset)}")
print("Train Dataset Shape:", train_dataset.sequences.shape)
print("Test Dataset Shape:", test_dataset.sequences.shape)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

print(f"Total de batches no conjunto de treino: {len(train_loader)}")
print(f"Total de batches no conjunto de teste: {len(test_loader)}")

# Modelo
model = CNN(input_channels=input_size, input_length=sequence_length, num_classes=output_size).to(device)
model.to(device)

Total de amostras no conjunto de treino: 17144
Total de amostras no conjunto de teste: 8415
Train Dataset Shape: torch.Size([17144, 9, 5])
Test Dataset Shape: torch.Size([8415, 9, 5])


In [None]:
# Exemplo de uso CNN
trainer_cnn  = TrainerCNN(dir_save="output/CNN", num_epochs=50)
trainer_cnn.fit(model, train_loader, test_loader, device)

Epoch [1/50] Train Loss: 0.1655 Val Loss:   0.3660 Accuracy:   0.8696
🔖 Melhor modelo salvo em: output/UEL/CNN\CNN_Epoca-1_Acc-0.87.pth
Epoch [2/50] Train Loss: 0.1161 Val Loss:   0.2794 Accuracy:   0.8695
🔖 Melhor modelo salvo em: output/UEL/CNN\CNN_Epoca-2_Acc-0.87.pth
Epoch [3/50] Train Loss: 0.1100 Val Loss:   0.4129 Accuracy:   0.8719
Epoch [4/50] Train Loss: 0.1049 Val Loss:   0.3277 Accuracy:   0.8734
Epoch [5/50] Train Loss: 0.1028 Val Loss:   0.3409 Accuracy:   0.8276
Epoch [6/50] Train Loss: 0.0964 Val Loss:   0.5265 Accuracy:   0.8618
Epoch [7/50] Train Loss: 0.0934 Val Loss:   0.2951 Accuracy:   0.8806
Epoch [8/50] Train Loss: 0.0877 Val Loss:   0.2777 Accuracy:   0.8864
🔖 Melhor modelo salvo em: output/UEL/CNN\CNN_Epoca-8_Acc-0.89.pth
Epoch [9/50] Train Loss: 0.0849 Val Loss:   0.3299 Accuracy:   0.8739
Epoch [10/50] Train Loss: 0.0807 Val Loss:   0.2871 Accuracy:   0.8788
Epoch [11/50] Train Loss: 0.0800 Val Loss:   0.4255 Accuracy:   0.8639
Epoch [12/50] Train Loss: 0.07

## CNN 2d

In [1]:
from models.CNN_2D.ModelCNN_2D import CNN_2D
from models.Sequence import SequenceDataset
from models.CNN_2D.TrainerCNN_2D import TrainerCNN_2D
from torch.utils.data import DataLoader
import torch

SEED = 42
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

input_size = 9
sequence_length = 5
output_size = 2
batch_size = 128
num_epochs = 50
learning_rate = 0.0001
column_to_remove = 'attack_name'

Usando dispositivo: cuda


In [None]:
# datasets
train_dataset = SequenceDataset('data/cic_puro/treino_final_estratificado_random.csv', sequence_length, column_to_remove, normalize=True, mode='cnn2d')
test_dataset = SequenceDataset('data/cic_puro/teste_final_estratificado_random.csv', sequence_length, column_to_remove, normalize=True, mode='cnn2d')

print(f"Total de amostras no conjunto de treino: {len(train_dataset)}")
print(f"Total de amostras no conjunto de teste: {len(test_dataset)}")
print("Train Dataset Shape:", train_dataset.sequences.shape)
print("Test Dataset Shape:", test_dataset.sequences.shape)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

print(f"Total de batches no conjunto de treino: {len(train_loader)}")
print(f"Total de batches no conjunto de teste: {len(test_loader)}")

# Modelo CNN_2D
model = CNN_2D(input_channels=input_size, input_length=sequence_length, num_classes=output_size).to(device)
model.to(device)

Total de amostras no conjunto de treino: 17144
Total de amostras no conjunto de teste: 8415
Train Dataset Shape: torch.Size([17144, 9, 5, 1])
Test Dataset Shape: torch.Size([8415, 9, 5, 1])


In [None]:
# Treinar CNN_2D
trainer_cnn  = TrainerCNN_2D(dir_save="output/CNN_2D", num_epochs=50)
trainer_cnn.fit(model, train_loader, test_loader, device)

Epoch [1/50] Train Loss: 0.1535 Val Loss:   0.2818 Accuracy:   0.8742
🔖 Melhor modelo salvo em: output/UEL/CNN_2D\CNN_Epoca-1_Acc-0.87.pth
Epoch [2/50] Train Loss: 0.1194 Val Loss:   0.3322 Accuracy:   0.8781
Epoch [3/50] Train Loss: 0.1090 Val Loss:   0.2977 Accuracy:   0.8751
Epoch [4/50] Train Loss: 0.1045 Val Loss:   0.2516 Accuracy:   0.8837
🔖 Melhor modelo salvo em: output/UEL/CNN_2D\CNN_Epoca-4_Acc-0.88.pth
Epoch [5/50] Train Loss: 0.0987 Val Loss:   0.2976 Accuracy:   0.8759
Epoch [6/50] Train Loss: 0.1008 Val Loss:   0.4745 Accuracy:   0.8689
Epoch [7/50] Train Loss: 0.0929 Val Loss:   0.4517 Accuracy:   0.8764
Epoch [8/50] Train Loss: 0.0893 Val Loss:   0.2662 Accuracy:   0.8843
Epoch [9/50] Train Loss: 0.0874 Val Loss:   0.2693 Accuracy:   0.8844
Epoch [10/50] Train Loss: 0.0859 Val Loss:   0.3042 Accuracy:   0.8816
Epoch [11/50] Train Loss: 0.0868 Val Loss:   0.3121 Accuracy:   0.8875
Epoch [12/50] Train Loss: 0.0811 Val Loss:   0.3228 Accuracy:   0.8799
Epoch [13/50] Train

## Hybrid 

In [None]:
import torch
from torch.utils.data import DataLoader
from models.Sequence import SequenceDataset
from models.Hybrid.ModelHybrid import ModelHybrid
from models.Hybrid.TrainerHybrid import TrainerHybrid

# Seed e dispositivo
SEED = 42
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

# Hiperparâmetros
input_size      = 9    # n_features
hidden_size     = 256  # hidden_size da LSTM
num_layers      = 3    # camadas LSTM
output_size     = 2    # classes (normal, anomalia)
batch_size      = 128
sequence_length = 10
column_to_remove = 'attack_name'

# Datasets
train_dataset = SequenceDataset(
    path               = 'data/cic_puro/treino_final_estratificado_random.csv',
    sequence_length    = sequence_length,
    column_to_remove   = column_to_remove,
    normalize          = True,
    mode               = 'lstm'
)
test_dataset = SequenceDataset(
    path               = 'data/cic_puro/teste_final_estratificado_random.csv',
    sequence_length    = sequence_length,
    column_to_remove   = column_to_remove,
    normalize          = True,
    mode               = 'lstm'
)

print(f"Total de amostras no conjunto de treino: {len(train_dataset)}")
print(f"Total de amostras no conjunto de teste: {len(test_dataset)}")
print("Train Dataset Shape:", train_dataset.sequences.shape)
print("Test Dataset Shape:", test_dataset.sequences.shape)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size)

print(f"Total de batches no treino: {len(train_loader)}")
print(f"Total de batches no teste:  {len(test_loader)}")

# Modelo híbrido 
model = ModelHybrid(
    seq_len           = sequence_length,
    n_features        = input_size,
    lstm_hidden_size  = hidden_size,
    lstm_num_layers   = num_layers,
    num_classes       = output_size
).to(device)
print(model)


Usando dispositivo: cuda
Total de amostras no conjunto de treino: 17139
Total de amostras no conjunto de teste: 8410
Train Dataset Shape: torch.Size([17139, 10, 9])
Test Dataset Shape: torch.Size([8410, 10, 9])
Total de batches no treino: 134
Total de batches no teste:  66
ModelHybrid(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=(2, 1), strid

In [None]:
# Treinar Hybrid
trainer = TrainerHybrid(dir_save="output/Hybrid", num_epochs=50)
trainer.fit(model, train_loader, test_loader, device)

In [2]:
# Jupyter cell: carregar checkpoint do Hybrid e treinar SVM

import os
import torch
import numpy as np
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader

from models.Sequence import SequenceDataset
from models.Hybrid.ModelHybrid import ModelHybrid

# --- 1) Configurações ---
SEED = 42
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sequence_length = 10
column_to_remove = 'attack_name'
batch_size = 128
input_size = 9
hidden_size = 256
num_layers = 3
output_size = 2

# --- 2) Datasets & Loaders (mesmo janelamento LSTM) ---
train_ds = SequenceDataset(
    path             = 'data/cic_puro/treino_final_estratificado_random.csv',
    sequence_length  = sequence_length,
    column_to_remove = column_to_remove,
    normalize        = True,
    mode             = 'lstm'
)
test_ds = SequenceDataset(
    path             = 'data/cic_puro/teste_final_estratificado_random.csv',
    sequence_length  = sequence_length,
    column_to_remove = column_to_remove,
    normalize        = True,
    mode             = 'lstm'
)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size)

# --- 3) Instancia e carrega checkpoint Hybrid ---
model = ModelHybrid(
    seq_len           = sequence_length,
    n_features        = input_size,
    lstm_hidden_size  = hidden_size,
    lstm_num_layers   = num_layers,
    num_classes       = output_size
)
ckpt_dir = "output/Hybrid"
# seleciona último .pth salvo
ckpts = sorted([f for f in os.listdir(ckpt_dir) if f.endswith(".pth")])
ckpt_path = os.path.join(ckpt_dir, ckpts[-1])
checkpoint = torch.load(ckpt_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device).eval()
print("Checkpoint carregado:", ckpt_path)

# --- 4) Extrai features do train e do test ---
def extract_all(loader):
    feats, labs = [], []
    with torch.no_grad():
        for X, y in loader:
            X = X.to(device)
            f = model.extract_features(X)
            feats.append(f.cpu().numpy())
            labs.append(y.numpy())
    return np.vstack(feats), np.hstack(labs)

X_train, y_train = extract_all(train_loader)
X_test,  y_test  = extract_all(test_loader)

# --- 5) (Opcional) PCA para reduzir dimensão ---
pca = PCA(n_components=128, random_state=SEED)
X_train_p = pca.fit_transform(X_train)
X_test_p  = pca.transform (X_test)

# --- 6) Treina o SVM e avalia ---
svm = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=SEED)
svm.fit(X_train_p, y_train)
y_pred = svm.predict(X_test_p)
print("SVM Accuracy:", accuracy_score(y_test, y_pred))


Checkpoint carregado: output/Hybrid\Hybrid_Ep3_Acc0.89.pth
SVM Accuracy: 0.8990487514863258


In [None]:
# Notebook cell

import torch
from torch.utils.data import DataLoader

from models.Sequence import SequenceDataset
from models.Hybrid.ModelHybrid import ModelHybrid
from models.Hybrid.TrainerHybridSVM import TrainerHybridSVM

# seed & device
SEED = 42
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# parâmetros
sequence_length = 10
column_to_remove = 'attack_name'
batch_size      = 128
input_size      = 9
hidden_size     = 256
num_layers      = 3
output_size     = 2

# datasets & loaders
train_ds = SequenceDataset(
    'data/cic_puro/treino_final_estratificado_random.csv',
    sequence_length,
    column_to_remove=column_to_remove,
    normalize=True,
    mode='lstm'
)
test_ds = SequenceDataset(
    'data/cic_puro/teste_final_estratificado_random.csv',
    sequence_length,
    column_to_remove=column_to_remove,
    normalize=True,
    mode='lstm'
)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size)

# modelo & trainer
model   = ModelHybrid(
    seq_len           = sequence_length,
    n_features        = input_size,
    lstm_hidden_size  = hidden_size,
    lstm_num_layers   = num_layers,
    num_classes       = output_size
)
trainer = TrainerHybridSVM(
    dir_save   = "output/HybridSVM",
    num_epochs = 50,
    C          = 1000,
    margin     = 0.5,
    lr         = 1e-4
)




In [2]:
# executar treinamento
trainer.fit(model, train_loader, test_loader, device)

[1/50] TrainLoss=0.1252 ValLoss=0.2794 ValAcc=0.8549
🔖 Salvo: output/HybridSVM\HybridSVM_Ep1_Val0.2794.pth
[2/50] TrainLoss=0.0911 ValLoss=0.3805 ValAcc=0.8765
[3/50] TrainLoss=0.0778 ValLoss=0.3045 ValAcc=0.8678
[4/50] TrainLoss=0.0796 ValLoss=0.3018 ValAcc=0.8816
[5/50] TrainLoss=0.0822 ValLoss=0.4925 ValAcc=0.4075
[6/50] TrainLoss=0.0833 ValLoss=0.7966 ValAcc=0.4065
[7/50] TrainLoss=0.0857 ValLoss=0.1424 ValAcc=0.8636
🔖 Salvo: output/HybridSVM\HybridSVM_Ep7_Val0.1424.pth
[8/50] TrainLoss=0.0948 ValLoss=0.4233 ValAcc=0.8836
[9/50] TrainLoss=0.1004 ValLoss=0.7050 ValAcc=0.4065
[10/50] TrainLoss=0.1579 ValLoss=0.4022 ValAcc=0.5357
[11/50] TrainLoss=0.4153 ValLoss=0.5044 ValAcc=0.4065
[12/50] TrainLoss=0.4985 ValLoss=0.5053 ValAcc=0.4065
[13/50] TrainLoss=0.4984 ValLoss=0.5053 ValAcc=0.4065
[14/50] TrainLoss=0.4984 ValLoss=0.5056 ValAcc=0.4065
[15/50] TrainLoss=0.4984 ValLoss=0.5054 ValAcc=0.4065
[16/50] TrainLoss=0.4983 ValLoss=0.5056 ValAcc=0.4065
[17/50] TrainLoss=0.4983 ValLoss=0.50

KeyboardInterrupt: 

In [1]:
import torch
from torch.utils.data import DataLoader

# 1) Imports do seu projeto
from models.Sequence import SequenceDataset
from models.Hybrid.ModelHybridAttnSVM   import ModelHybridAttnSVM
from models.Hybrid.TrainerHybridAttnSVM import TrainerHybridAttnSVM

# 2) Seed e dispositivo
SEED = 42
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

# 3) Hiperparâmetros
sequence_length  = 10
column_to_remove = 'attack_name'
batch_size       = 128
n_features       = 9
hidden_size      = 256
num_layers       = 3
num_classes      = 2
num_epochs       = 50

# 4) Criar os datasets (mesmos nomes de variável anteriores)
train_dataset = SequenceDataset(
    path             = 'data/cic_puro/treino_final_estratificado_random.csv',
    sequence_length  = sequence_length,
    column_to_remove = column_to_remove,
    normalize        = True,
    mode             = 'lstm'
)
test_dataset = SequenceDataset(
    path             = 'data/cic_puro/teste_final_estratificado_random.csv',
    sequence_length  = sequence_length,
    column_to_remove = column_to_remove,
    normalize        = True,
    mode             = 'lstm'
)

# 5) Criar os DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size)

print(f"Total de batches no treino: {len(train_loader)}")
print(f"Total de batches no teste:  {len(test_loader)}")

# 6) Instanciar modelo e treinador
model = ModelHybridAttnSVM(
    seq_len       = sequence_length,
    n_features    = n_features,
    lstm_hidden   = hidden_size,
    lstm_layers   = num_layers,
    num_classes   = num_classes
)
trainer = TrainerHybridAttnSVM(
    dir_save   = "output/HybridAttnSVM",
    num_epochs = num_epochs,
    C          = 10.0,
    margin     = 0.5,
    lr         = 1e-4
)

# 7) Executar treinamento
trainer.fit(model, train_loader, test_loader, device)


Usando dispositivo: cuda
Total de batches no treino: 134
Total de batches no teste:  66
[1/50] TrLoss=0.1272 ValLoss=0.0903 ValAcc=0.8459
🔖 Salvo: output/HybridAttnSVM\HybridAttnSVM_Ep1_Val0.0903.pth
[2/50] TrLoss=0.0661 ValLoss=0.0817 ValAcc=0.8536
🔖 Salvo: output/HybridAttnSVM\HybridAttnSVM_Ep2_Val0.0817.pth
[3/50] TrLoss=0.0548 ValLoss=0.0768 ValAcc=0.8629
🔖 Salvo: output/HybridAttnSVM\HybridAttnSVM_Ep3_Val0.0768.pth
[4/50] TrLoss=0.0480 ValLoss=0.0738 ValAcc=0.8690
🔖 Salvo: output/HybridAttnSVM\HybridAttnSVM_Ep4_Val0.0738.pth
[5/50] TrLoss=0.0422 ValLoss=0.0792 ValAcc=0.8521
[6/50] TrLoss=0.0379 ValLoss=0.0749 ValAcc=0.8805
[7/50] TrLoss=0.0346 ValLoss=0.0700 ValAcc=0.8809
🔖 Salvo: output/HybridAttnSVM\HybridAttnSVM_Ep7_Val0.0700.pth
[8/50] TrLoss=0.0329 ValLoss=0.0782 ValAcc=0.8579
[9/50] TrLoss=0.0306 ValLoss=0.1010 ValAcc=0.8798
[10/50] TrLoss=0.0302 ValLoss=0.0819 ValAcc=0.8546
[11/50] TrLoss=0.0311 ValLoss=0.0676 ValAcc=0.8728
🔖 Salvo: output/HybridAttnSVM\HybridAttnSVM_Ep11_V