In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import time
from tqdm import tqdm

In [2]:
# 定義圖像增強與標準化
transform = {
    'train': transforms.Compose([
        transforms.ToTensor(),  # 轉為 Tensor
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
    ])
}

# 資料集路徑
train_data_dir = 'data/image/cicids2018/train_224/'
val_data_dir = 'data/image/cicids2018/val_224/'
batch_size = 128

# 使用 ImageFolder 自動根據子資料夾讀取標籤
train_dataset = datasets.ImageFolder(root=train_data_dir, transform=transform['train'])
val_dataset = datasets.ImageFolder(root=val_data_dir, transform=transform['val'])

# 建立 DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True)

# 檢查類別數量
print(f"訓練集的類別數：{len(train_dataset.classes)}")
print(f"類別標籤對應：{train_dataset.class_to_idx}")


訓練集的類別數：5
類別標籤對應：{'Benign': 0, 'Botnets': 1, 'Brute-force': 2, 'DoS': 3, 'Web-attacks': 4}


In [3]:
# 加載 ResNet 模型，使用預訓練權重
model = models.resnet18(pretrained=True)

# 替換最後一層全連接層
num_ftrs = model.fc.in_features  # ResNet 最後一層的輸入特徵數量
num_classes = len(train_dataset.classes)  # 六大類別
model.fc = nn.Linear(num_ftrs, num_classes)  # 替換為我們的輸出層

# 將模型移至 GPU（如果可用）
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# 定義損失函數（交叉熵損失）
criterion = nn.CrossEntropyLoss()

# 定義優化器（使用 Adam）
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 學習率調整器
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


In [4]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10, save_path='./best_model.pth'):
    since = time.time()
    best_acc = 0.0  # 紀錄最佳準確率

    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch + 1}/{num_epochs}')
        print('-' * 40)

        # 訓練與驗證
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # 訓練模式
                dataloader = train_loader
            else:
                model.eval()  # 驗證模式
                dataloader = val_loader

            running_loss = 0.0
            running_corrects = 0

            # 使用 tqdm 顯示進度條
            progress_bar = tqdm(dataloader, desc=f'{phase.capitalize()} Progress', leave=False)

            for inputs, labels in progress_bar:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()  # 清空梯度

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()  # 反向傳播
                        optimizer.step()  # 更新權重

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                # 更新進度條的描述
                progress_bar.set_postfix(loss=f"{running_loss / len(dataloader.dataset):.4f}", acc=f"{running_corrects.double() / len(dataloader.dataset):.4f}")

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = running_corrects.double() / len(dataloader.dataset)

            print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # 學習率調整
            if phase == 'train':
                scheduler.step()

            # 保存最佳模型
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                torch.save(model.state_dict(), save_path)
                print(f'>> Epoch {epoch + 1}: Validation accuracy improved to {best_acc:.4f}, model saved to {save_path}')

    time_elapsed = time.time() - since
    print(f'\n訓練完成，耗時 {time_elapsed // 60:.0f} 分鐘 {time_elapsed % 60:.0f} 秒')
    print(f'最佳驗證準確率：{best_acc:.4f}')
    return model

# 開始訓練
trained_model = train_model(model, criterion, optimizer, scheduler, num_epochs=3)
# torch.save(trained_model.state_dict(), 'resnet_classifier.pth')
# print("模型權重已保存")


Epoch 1/3
----------------------------------------


                                                                                            

Train Loss: 0.0784 Acc: 0.9734


                                                                                        

Val Loss: 0.0632 Acc: 0.9789
>> Epoch 1: Validation accuracy improved to 0.9789, model saved to ./best_model.pth

Epoch 2/3
----------------------------------------


                                                                                            

Train Loss: 0.0641 Acc: 0.9772


                                                                                        

Val Loss: 0.0584 Acc: 0.9803
>> Epoch 2: Validation accuracy improved to 0.9803, model saved to ./best_model.pth

Epoch 3/3
----------------------------------------


                                                                                            

Train Loss: 0.0613 Acc: 0.9779


                                                                                        

Val Loss: 0.0569 Acc: 0.9808
>> Epoch 3: Validation accuracy improved to 0.9808, model saved to ./best_model.pth

訓練完成，耗時 13 分鐘 50 秒
最佳驗證準確率：0.9808


In [5]:
# 確保 CICIDS2018 目錄正確
cicids2017_data_dir = "data/image/cicids2017/train_224"

# 讀取測試數據
cicids2017_dataset = datasets.ImageFolder(root=cicids2017_data_dir, transform=transform['val'])
cicids2017_loader = DataLoader(cicids2017_dataset, batch_size=32, shuffle=False, num_workers=4)

# 檢查類別標籤對應
print(f"CICIDS2017 類別數量: {len(cicids2017_dataset.classes)}")
print(f"CICIDS2018 類別對應: {train_dataset.class_to_idx}")
print(f"CICIDS2017 類別對應: {cicids2017_dataset.class_to_idx}")

CICIDS2017 類別數量: 6
CICIDS2018 類別對應: {'Benign': 0, 'Botnets': 1, 'Brute-force': 2, 'DoS': 3, 'Web-attacks': 4}
CICIDS2017 類別對應: {'Benign': 0, 'Botnets': 1, 'Brute-force': 2, 'DoS': 3, 'Port-scan': 4, 'Web-attacks': 5}


In [6]:
# 標籤對應表 (將 CICIDS2018 的標籤對應回 CICIDS2017)
label_mapping = {
    0: 0,  # Benign -> Benign
    1: 1,  # Botnets -> Botnets
    2: 2,  # Brute-force -> Brute-force
    3: 3,  # DoS -> DoS
    4: 5,  # Port-scan (CICIDS2017:4) -> none
    5: 4   # Web-attacks (CICIDS2017:5) -> 4
}

In [7]:
from tqdm import tqdm

def evaluate_model(model, dataloader, dataset, label_mapping):
    model.eval()
    correct = 0
    total = 0

    # 進度條初始化
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc="Evaluating", unit="batch", leave=True)

    with torch.no_grad():
        for batch_idx, (images, labels) in pbar:
            batch_size = images.size(0)
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            # 轉換測試標籤至訓練標籤對應
            mapped_labels = torch.tensor([label_mapping[label.item()] for label in labels], device=device)

            # 計算正確率
            correct += (preds == mapped_labels).sum().item()
            total += labels.size(0)

            # 更新進度條：顯示目前處理到第幾個 batch、圖片數、當前準確率
            pbar.set_postfix(batch=f"{batch_idx+1}/{len(dataloader)}", processed=f"{total}/{len(dataset)}", acc=f"{correct / total:.4f}")

    acc = correct / total
    print(f"\nCICIDS2018 測試集準確率: {acc:.4f}")

# 執行測試
evaluate_model(trained_model, cicids2017_loader, cicids2017_dataset, label_mapping)

Evaluating: 100%|██████████| 456/456 [00:10<00:00, 43.74batch/s, acc=0.4866, batch=456/456, processed=14592/14592]


CICIDS2018 測試集準確率: 0.4866



