In [1]:
# ===================================================================
# Ensemble_Hybrid 完整 PyTorch 訓練與部署文件打包腳本
#
# 結合了 Ensemble_Hybrid 的高性能模型與生產級的模型文件打包功能。
#
# 主要特點:
# 1. 使用 Ensemble_Hybrid 複雜模型進行訓練。
# 2. 包含完整的數據清洗、特徵工程與模型穩定性修復。
# 3. 在訓練結束後，自動打包所有部署所需文件：
#    - model.pth (模型權重)
#    - scaler.pkl (數據縮放器)
#    - label_encoder.pkl (標籤編碼器)
#    - feature_selector.pkl (特徵選擇器)
#    - selected_features.json (所選特徵列表)
#    - model_info.json (模型元數據)
# ===================================================================

# 1. 導入必要的庫
import numpy as np
import pandas as pd
import os
import gc
import json
import joblib
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import torch.amp
from torch.amp import GradScaler

# 引入 sklearn 相關函式庫
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif, VarianceThreshold
from sklearn.metrics import classification_report, confusion_matrix

# 引入 tqdm 函式庫來顯示進度條
from tqdm import tqdm

# ===================================================================
# 2. 模型組件定義
# ===================================================================

class ResidualBlock(nn.Module):
    """殘差塊 - 改善梯度流動"""
    def __init__(self, dim, dropout_rate=0.2):
        super().__init__()
        self.block = nn.Sequential(
            nn.Linear(dim, dim),
            nn.BatchNorm1d(dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(dim, dim),
            nn.BatchNorm1d(dim)
        )
        self.dropout = nn.Dropout(dropout_rate)
        
    def forward(self, x):
        residual = x
        out = self.block(x)
        out = out + residual
        return F.relu(self.dropout(out))

class SelfAttentionBranch(nn.Module):
    """自注意力分支"""
    def __init__(self, input_dim, num_classes, dropout_rate=0.2):
        super().__init__()
        self.attention_dim = min(64, input_dim)
        self.query = nn.Linear(input_dim, self.attention_dim)
        self.key = nn.Linear(input_dim, self.attention_dim)
        self.value = nn.Linear(input_dim, self.attention_dim)
        self.output_projection = nn.Linear(self.attention_dim, input_dim)
        self.classifier = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, num_classes)
        )
        self._init_weights()
    
    def _init_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        q = self.query(x)
        k = self.key(x)
        v = self.value(x)
        attention_scores = torch.sum(q * k, dim=1, keepdim=True)
        attention_weights = torch.sigmoid(attention_scores)
        attention_weights = torch.clamp(attention_weights, min=1e-8, max=1.0)
        attended = attention_weights * v
        projected = self.output_projection(attended)
        return self.classifier(projected)

class FeatureInteractionBranch(nn.Module):
    """特徵交互分支"""
    def __init__(self, input_dim, num_classes, dropout_rate=0.2):
        super().__init__()
        self.interaction_dim = min(8, input_dim // 8)
        self.feature_embeddings = nn.Linear(input_dim, self.interaction_dim)
        self.interaction_output_dim = (self.interaction_dim * (self.interaction_dim - 1)) // 2
        if self.interaction_output_dim == 0:
            self.interaction_output_dim = 1
        self.classifier = nn.Sequential(
            nn.Linear(input_dim + self.interaction_output_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(128, num_classes)
        )
        self._init_weights()
    
    def _init_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        embeddings = torch.tanh(self.feature_embeddings(x))
        interactions = []
        if self.interaction_dim > 1:
            for i in range(self.interaction_dim):
                for j in range(i + 1, self.interaction_dim):
                    interaction = embeddings[:, i] * embeddings[:, j]
                    interactions.append(interaction.unsqueeze(1))
        
        if interactions:
            interaction_features = torch.cat(interactions, dim=1)
        else:
            interaction_features = torch.zeros(x.size(0), 1, device=x.device)
        
        combined_features = torch.cat([x, interaction_features], dim=1)
        return self.classifier(combined_features)

class Ensemble_Hybrid(nn.Module):
    """集成混合網絡"""
    def __init__(self, input_dim, num_classes=15, dropout_rate=0.2):
        super().__init__()
        self.input_dim = input_dim
        self.num_classes = num_classes
        
        self.deep_branch = nn.Sequential(
            nn.Linear(input_dim, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(128, num_classes)
        )
        self.wide_branch = nn.Sequential(
            nn.Linear(input_dim, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(256, num_classes)
        )
        self.res_branch = nn.Sequential(
            nn.Linear(input_dim, 128), nn.BatchNorm1d(128), nn.ReLU(),
            ResidualBlock(128, dropout_rate),
            ResidualBlock(128, dropout_rate),
            nn.Linear(128, num_classes)
        )
        self.attention_branch = SelfAttentionBranch(input_dim, num_classes, dropout_rate)
        self.interaction_branch = FeatureInteractionBranch(input_dim, num_classes, dropout_rate)
        
        self.weight_net = nn.Sequential(
            nn.Linear(input_dim, 32), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(32, 5), nn.Softmax(dim=1)
        )
        self.final_fusion = nn.Sequential(
            nn.Linear(num_classes * 5, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(128, num_classes)
        )
        self.global_weights = nn.Parameter(torch.ones(5) / 5)
        self._init_weights()
    
    def _init_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                if module.bias is not None: nn.init.constant_(module.bias, 0)
            elif isinstance(module, nn.BatchNorm1d):
                nn.init.constant_(module.weight, 1)
                nn.init.constant_(module.bias, 0)
    
    def forward(self, x, return_intermediate=False):
        if torch.isnan(x).any() or torch.isinf(x).any():
            x = torch.nan_to_num(x, nan=0.0, posinf=1.0, neginf=-1.0)
        
        outputs = [
            self.deep_branch(x), self.wide_branch(x), self.res_branch(x),
            self.attention_branch(x), self.interaction_branch(x)
        ]
        
        for i, out in enumerate(outputs):
            if torch.isnan(out).any() or torch.isinf(out).any():
                outputs[i] = torch.nan_to_num(out, nan=0.0, posinf=1.0, neginf=-1.0)
        
        deep_out, wide_out, res_out, att_out, inter_out = outputs
        
        adaptive_weights = torch.clamp(self.weight_net(x), min=1e-8, max=1.0)
        global_weights = torch.clamp(F.softmax(self.global_weights, dim=0), min=1e-8, max=1.0)
        
        outputs_stack = torch.stack(outputs, dim=2)
        
        weighted_output_adaptive = torch.sum(outputs_stack * adaptive_weights.unsqueeze(1), dim=2)
        weighted_output_global = torch.sum(outputs_stack * global_weights.unsqueeze(0).unsqueeze(0), dim=2)
        weighted_output = 0.6 * weighted_output_adaptive + 0.4 * weighted_output_global
        
        concatenated = torch.cat(outputs, dim=1)
        final_output = self.final_fusion(concatenated)
        
        ensemble_output = 0.7 * final_output + 0.3 * weighted_output
        
        if torch.isnan(ensemble_output).any() or torch.isinf(ensemble_output).any():
            ensemble_output = torch.nan_to_num(ensemble_output, nan=0.0, posinf=1.0, neginf=-1.0)
            
        if return_intermediate:
            return {
                'ensemble': ensemble_output, 'final_fusion': final_output,
                'weighted': weighted_output, 'branches': tuple(outputs),
                'weights': (adaptive_weights, global_weights)
            }
        return ensemble_output

# ===================================================================
# 3. 損失函數
# ===================================================================

class EnsembleLoss(nn.Module):
    """集成損失函數"""
    def __init__(self, num_classes, alpha=0.7, label_smoothing=0.1):
        super().__init__()
        self.criterion = nn.CrossEntropyLoss(label_smoothing=label_smoothing)
        self.alpha = alpha
        
    def forward(self, outputs, targets):
        if isinstance(outputs, dict):
            main_loss = self.criterion(outputs['ensemble'], targets)
            
            branch_losses = []
            for branch_out in outputs['branches']:
                if not (torch.isnan(branch_out).any() or torch.isinf(branch_out).any()):
                    branch_losses.append(self.criterion(branch_out, targets))
            
            auxiliary_loss = torch.mean(torch.stack(branch_losses)) if branch_losses else torch.tensor(0.0, device=targets.device)
            fusion_loss = self.criterion(outputs['final_fusion'], targets)
            
            if torch.isnan(main_loss): main_loss = torch.tensor(0.0, device=targets.device)
            if torch.isnan(auxiliary_loss): auxiliary_loss = torch.tensor(0.0, device=targets.device)
            if torch.isnan(fusion_loss): fusion_loss = torch.tensor(0.0, device=targets.device)
            
            total_loss = (self.alpha * main_loss + 
                          (1 - self.alpha) * 0.7 * auxiliary_loss + 
                          (1 - self.alpha) * 0.3 * fusion_loss)
            return total_loss
        else:
            return self.criterion(outputs, targets)

# ===================================================================
# 4. 訓練與評估函數
# ===================================================================

def train_ensemble_model(model, train_loader, val_loader, device, num_epochs, patience, use_multi_gpu):
    """訓練函數"""
    print("\n--- 開始訓練 Ensemble_Hybrid 模型 ---")
    actual_model = model.module if use_multi_gpu else model
    optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=3)
    criterion = EnsembleLoss(num_classes=actual_model.num_classes, label_smoothing=0.1)
    best_val_loss = float('inf')
    patience_counter = 0
    model_output_dir = "/kaggle/working/ensemble_hybrid_model_checkpoint"
    os.makedirs(model_output_dir, exist_ok=True)
    best_model_path = os.path.join(model_output_dir, "best_ensemble_model.pth")

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [訓練]", leave=False)
        
        for batch_idx, (inputs, labels) in enumerate(train_pbar):
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            if torch.isnan(inputs).any():
                inputs = torch.nan_to_num(inputs, nan=0.0)
            
            optimizer.zero_grad(set_to_none=True)
            outputs = model(inputs, return_intermediate=(epoch < 10))
            loss = criterion(outputs, labels)
            
            if torch.isnan(loss): continue
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.5)
            optimizer.step()
            
            total_loss += loss.item()
            train_pbar.set_postfix({'Loss': f'{loss.item():.4f}'})

        avg_train_loss = total_loss / len(train_loader)
        
        model.eval()
        total_val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
                if torch.isnan(inputs).any():
                    inputs = torch.nan_to_num(inputs, nan=0.0)
                outputs = model(inputs)
                if isinstance(outputs, dict): outputs = outputs['ensemble']
                if torch.isnan(outputs).any(): continue
                    
                loss = criterion(outputs, labels)
                if not torch.isnan(loss): total_val_loss += loss.item()
                
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        avg_val_loss = total_val_loss / len(val_loader) if len(val_loader) > 0 else float('inf')
        val_accuracy = 100 * val_correct / val_total if val_total > 0 else 0
        scheduler.step(avg_val_loss)
        
        print(f"Epoch {epoch+1}/{num_epochs} | 訓練損失: {avg_train_loss:.4f} | 驗證損失: {avg_val_loss:.4f} | 驗證準確率: {val_accuracy:.2f}%")
        
        if avg_val_loss < best_val_loss and not np.isnan(avg_val_loss):
            best_val_loss = avg_val_loss
            patience_counter = 0
            model_state_dict = model.module.state_dict() if use_multi_gpu else model.state_dict()
            torch.save({
                'model_state_dict': model_state_dict, 'epoch': epoch,
                'val_loss': avg_val_loss, 'val_accuracy': val_accuracy
            }, best_model_path)
            print(f"  ✓ 驗證損失降低，模型已保存")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"\n早停機制觸發：在 {patience} 個週期內無改進。")
                break
    
    print("\n訓練完成！")
    return best_model_path

def evaluate_ensemble_model(model, test_loader, device, label_encoder):
    """評估函數"""
    print("\n--- 正在評估 Ensemble_Hybrid 模型 ---")
    model.eval()
    y_pred_list, y_true_list = [], []
    
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="評估測試集"):
            inputs = inputs.to(device, non_blocking=True)
            if torch.isnan(inputs).any():
                inputs = torch.nan_to_num(inputs, nan=0.0)
            
            outputs = model(inputs)
            if isinstance(outputs, dict): outputs = outputs['ensemble']
            if torch.isnan(outputs).any():
                outputs = torch.nan_to_num(outputs, nan=0.0)

            _, predicted_labels = torch.max(outputs, 1)
            y_pred_list.extend(predicted_labels.cpu().numpy())
            y_true_list.extend(labels.cpu().numpy())
            
    report = classification_report(
        y_true_list, y_pred_list,
        target_names=[str(cls) for cls in label_encoder.classes_],
        zero_division=0, digits=4
    )
    return report

# ===================================================================
# 5. 新增：保存所有模型文件的函數
# ===================================================================
def save_model_files(model, scaler, label_encoder, feature_selector, selected_features, 
                    model_dir='/kaggle/working/ensemble_hybrid_model_package'):
    """保存所有用於部署的必要文件"""
    print(f"\n--- 正在保存完整的模型文件包至 {model_dir}/ ---")
    os.makedirs(model_dir, exist_ok=True)
    
    actual_model = model.module if isinstance(model, nn.DataParallel) else model
    
    # 1. 保存 PyTorch 模型
    torch.save({
        'model_state_dict': actual_model.state_dict(),
        'model_architecture': 'Ensemble_Hybrid',
        'input_dim': actual_model.input_dim,
        'num_classes': actual_model.num_classes,
    }, os.path.join(model_dir, 'model.pth'))
    print("✓ 已保存 model.pth")
    
    # 2. 保存 scikit-learn 物件
    joblib.dump(scaler, os.path.join(model_dir, 'scaler.pkl'))
    print("✓ 已保存 scaler.pkl")
    
    joblib.dump(label_encoder, os.path.join(model_dir, 'label_encoder.pkl'))
    print("✓ 已保存 label_encoder.pkl")
    
    joblib.dump(feature_selector, os.path.join(model_dir, 'feature_selector.pkl'))
    print("✓ 已保存 feature_selector.pkl")
    
    # 3. 保存特徵列表為 JSON
    with open(os.path.join(model_dir, 'selected_features.json'), 'w') as f:
        json.dump(selected_features, f, indent=2)
    print("✓ 已保存 selected_features.json")
    
    # 4. 保存模型元數據
    model_info = {
        'dataset': 'CIC-IDS2017',
        'model_type': 'PyTorch Ensemble_Hybrid',
        'num_features_selected': len(selected_features),
        'num_classes': len(label_encoder.classes_),
        'classes': label_encoder.classes_.tolist(),
        'preprocessing_steps': {
            'variance_threshold': 0.001,
            'feature_selection_method': 'SelectKBest',
            'k_features': len(selected_features),
            'scaling_method': 'StandardScaler'
        }
    }
    with open(os.path.join(model_dir, 'model_info.json'), 'w') as f:
        json.dump(model_info, f, indent=2)
    print("✓ 已保存 model_info.json")

# ===================================================================
# 6. 主執行函數 (修改版 - 整合所有功能)
# ===================================================================
def main():
    """主執行流程"""
    # --- 設置與環境檢查 ---
    pd.set_option('display.max_columns', None)
    seed = 42
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

    # --- 數據載入 ---
    data_path = '/kaggle/input/cicids2017'
    print(f"正在從路徑讀取數據: {data_path}")
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"錯誤：找不到數據路徑 '{data_path}'。")
    
    parquet_files = [os.path.join(data_path, f) for f in os.listdir(data_path) if f.endswith('.parquet')]
    if not parquet_files:
        raise FileNotFoundError(f"錯誤：路徑 '{data_path}' 中找不到 .parquet 檔案。")

    df_list = [pd.read_parquet(file) for file in tqdm(parquet_files, desc="正在載入 Parquet 檔案")]
    df = pd.concat(df_list, ignore_index=True)
    del df_list; gc.collect()
    print(f"數據載入完成，原始形狀: {df.shape}")

    # --- 數據預處理 ---
    label_column = 'Label' if 'Label' in df.columns else ' Label'
    df.rename(columns={col: col.strip() for col in df.columns}, inplace=True)
    label_column = label_column.strip()

    print("\n--- 正在進行數據清洗 ---")
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.fillna(0, inplace=True)
    
    numeric_columns = df.select_dtypes(include=[np.number]).columns
    for col in numeric_columns:
        if col != label_column:
            q99 = df[col].quantile(0.99)
            q01 = df[col].quantile(0.01)
            df[col] = df[col].clip(lower=q01, upper=q99)

    initial_rows = df.shape[0]
    df.drop_duplicates(inplace=True)
    print(f"移除了 {initial_rows - df.shape[0]} 筆重複記錄。")
    df.reset_index(drop=True, inplace=True)

    X = df.drop(columns=[label_column])
    y = df[label_column].copy()
    del df; gc.collect()

    y = y.str.replace(r'[^a-zA-Z0-9\s-]', '', regex=True).str.strip()
    label_counts = y.value_counts()
    threshold = 1000
    rare_labels = label_counts[label_counts < threshold].index
    if not rare_labels.empty:
        print(f"將 {len(rare_labels)} 個稀有類別合併為 'Rare_Attack'")
        y.replace(rare_labels, 'Rare_Attack', inplace=True)
    
    print("\n--- 正在移除低方差/常數特徵 ---")
    var_selector = VarianceThreshold(threshold=0.001)
    X_filtered = var_selector.fit_transform(X)
    retained_columns = X.columns[var_selector.get_support()]
    X = pd.DataFrame(X_filtered, index=X.index, columns=retained_columns)
    print(f"移除低方差特徵後，剩餘 {X.shape[1]} 個特徵。")

    X = X.fillna(0)
    X = X.replace([np.inf, -np.inf], 0)

    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    num_classes = len(label_encoder.classes_)
    print(f"\n總共有 {num_classes} 個類別")

    X_train_val, X_test, y_train_val, y_test = train_test_split(
        X, y_encoded, test_size=0.2, random_state=seed, stratify=y_encoded
    )
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val, y_train_val, test_size=0.2, random_state=seed, stratify=y_train_val
    )

    # 特徵選擇
    K_FEATURES = min(64, X_train.shape[1])
    selector = SelectKBest(f_classif, k=K_FEATURES)
    X_train_selected = selector.fit_transform(X_train, y_train)
    X_val_selected = selector.transform(X_val)
    X_test_selected = selector.transform(X_test)
    
    selected_features_mask = selector.get_support()
    selected_features = X_train.columns[selected_features_mask].tolist()
    print(f"\n成功選取了 {len(selected_features)} 個特徵進行訓練。")

    # 標準化
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_selected)
    X_val_scaled = scaler.transform(X_val_selected)
    X_test_scaled = scaler.transform(X_test_selected)
    
    X_train_scaled = np.nan_to_num(X_train_scaled, nan=0.0, posinf=1.0, neginf=-1.0)
    X_val_scaled = np.nan_to_num(X_val_scaled, nan=0.0, posinf=1.0, neginf=-1.0)
    X_test_scaled = np.nan_to_num(X_test_scaled, nan=0.0, posinf=1.0, neginf=-1.0)
    
    print("\n所有數據預處理與特徵工程已完成！")

    # --- DataLoader 準備 ---
    batch_size = 512
    num_workers = min(2, os.cpu_count())
    
    train_dataset = TensorDataset(torch.from_numpy(X_train_scaled).float(), torch.from_numpy(y_train).long())
    val_dataset = TensorDataset(torch.from_numpy(X_val_scaled).float(), torch.from_numpy(y_val).long())
    test_dataset = TensorDataset(torch.from_numpy(X_test_scaled).float(), torch.from_numpy(y_test).long())
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, persistent_workers=True if num_workers > 0 else False)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, persistent_workers=True if num_workers > 0 else False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, persistent_workers=True if num_workers > 0 else False)

    # --- 模型初始化與訓練 ---
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"\n使用設備: {device}")
    
    model = Ensemble_Hybrid(input_dim=K_FEATURES, num_classes=num_classes, dropout_rate=0.2)
    
    use_multi_gpu = torch.cuda.device_count() > 1
    if use_multi_gpu:
        print(f"\n檢測到 {torch.cuda.device_count()} 個GPU，啟用並行訓練。")
        model = nn.DataParallel(model)
    model.to(device)
    
    print(f"模型總參數數量: {sum(p.numel() for p in model.parameters()):,}")

    best_model_path = train_ensemble_model(
        model=model, train_loader=train_loader, val_loader=val_loader,
        device=device, num_epochs=30, patience=8, use_multi_gpu=use_multi_gpu
    )

    # --- 模型評估與文件保存 ---
    del model, train_loader, val_loader; gc.collect()
    if torch.cuda.is_available(): torch.cuda.empty_cache()

    print("\n--- 載入最佳模型進行最終評估 ---")
    final_model = Ensemble_Hybrid(input_dim=K_FEATURES, num_classes=num_classes, dropout_rate=0.2)
    checkpoint = torch.load(best_model_path, map_location=device)
    
    # 兼容單GPU和多GPU保存的模型
    state_dict = checkpoint['model_state_dict']
    if use_multi_gpu and not list(state_dict.keys())[0].startswith('module.'):
        state_dict = {'module.' + k: v for k, v in state_dict.items()}
    elif not use_multi_gpu and list(state_dict.keys())[0].startswith('module.'):
        state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}

    final_model.load_state_dict(state_dict)

    if use_multi_gpu:
        final_model = nn.DataParallel(final_model)
    final_model.to(device)
    print(f"成功載入來自 Epoch {checkpoint['epoch']+1} 的最佳模型。")

    report = evaluate_ensemble_model(
        final_model, test_loader, device, label_encoder
    )

    print("\n🎉🎉🎉 --- 最終評估報告 (測試集) --- 🎉🎉🎉")
    print(report)

    # --- 保存所有部署所需的文件 ---
    save_model_files(
        model=final_model,
        scaler=scaler,
        label_encoder=label_encoder,
        feature_selector=selector,
        selected_features=selected_features
    )
    
    print("\n✅ 訓練與模型文件打包全部完成！")


# ===================================================================
# 7. 程序入口
# ===================================================================
if __name__ == '__main__':
    try:
        main()
        print("\n🎉 腳本執行成功！")
    except Exception as e:
        print(f"\n❌ 執行過程中出現錯誤: {str(e)}")
        import traceback
        traceback.print_exc()

正在從路徑讀取數據: /kaggle/input/cicids2017


正在載入 Parquet 檔案: 100%|██████████| 8/8 [00:00<00:00,  8.55it/s]


數據載入完成，原始形狀: (2313810, 78)

--- 正在進行數據清洗 ---
移除了 84674 筆重複記錄。
將 4 個稀有類別合併為 'Rare_Attack'

--- 正在移除低方差/常數特徵 ---
移除低方差特徵後，剩餘 65 個特徵。

總共有 12 個類別

成功選取了 64 個特徵進行訓練。

所有數據預處理與特徵工程已完成！

使用設備: cuda
模型總參數數量: 351,130

--- 開始訓練 Ensemble_Hybrid 模型 ---


                                                                                   

Epoch 1/30 | 訓練損失: 0.6440 | 驗證損失: 0.5526 | 驗證準確率: 99.17%
  ✓ 驗證損失降低，模型已保存


                                                                                   

Epoch 2/30 | 訓練損失: 0.5721 | 驗證損失: 0.5466 | 驗證準確率: 99.24%
  ✓ 驗證損失降低，模型已保存


                                                                                   

Epoch 3/30 | 訓練損失: 0.5621 | 驗證損失: 0.5429 | 驗證準確率: 99.43%
  ✓ 驗證損失降低，模型已保存


                                                                                   

Epoch 4/30 | 訓練損失: 0.5565 | 驗證損失: 0.5414 | 驗證準確率: 99.48%
  ✓ 驗證損失降低，模型已保存


                                                                                   

Epoch 5/30 | 訓練損失: 0.5519 | 驗證損失: 0.5401 | 驗證準確率: 99.49%
  ✓ 驗證損失降低，模型已保存


                                                                                   

Epoch 6/30 | 訓練損失: 0.5497 | 驗證損失: 0.5395 | 驗證準確率: 99.51%
  ✓ 驗證損失降低，模型已保存


                                                                                   

Epoch 7/30 | 訓練損失: 0.5482 | 驗證損失: 0.5390 | 驗證準確率: 99.52%
  ✓ 驗證損失降低，模型已保存


                                                                                   

Epoch 8/30 | 訓練損失: 0.5471 | 驗證損失: 0.5385 | 驗證準確率: 99.53%
  ✓ 驗證損失降低，模型已保存


                                                                                   

Epoch 9/30 | 訓練損失: 0.5463 | 驗證損失: 0.5383 | 驗證準確率: 99.51%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 10/30 | 訓練損失: 0.5455 | 驗證損失: 0.5378 | 驗證準確率: 99.56%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 11/30 | 訓練損失: 0.5411 | 驗證損失: 0.5376 | 驗證準確率: 99.55%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 12/30 | 訓練損失: 0.5401 | 驗證損失: 0.5373 | 驗證準確率: 99.57%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 13/30 | 訓練損失: 0.5396 | 驗證損失: 0.5383 | 驗證準確率: 99.47%


                                                                                    

Epoch 14/30 | 訓練損失: 0.5392 | 驗證損失: 0.5369 | 驗證準確率: 99.57%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 15/30 | 訓練損失: 0.5388 | 驗證損失: 0.5369 | 驗證準確率: 99.57%


                                                                                    

Epoch 16/30 | 訓練損失: 0.5385 | 驗證損失: 0.5366 | 驗證準確率: 99.58%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 17/30 | 訓練損失: 0.5383 | 驗證損失: 0.5367 | 驗證準確率: 99.58%


                                                                                    

Epoch 18/30 | 訓練損失: 0.5381 | 驗證損失: 0.5364 | 驗證準確率: 99.61%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 19/30 | 訓練損失: 0.5378 | 驗證損失: 0.5363 | 驗證準確率: 99.61%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 20/30 | 訓練損失: 0.5378 | 驗證損失: 0.5362 | 驗證準確率: 99.62%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 21/30 | 訓練損失: 0.5376 | 驗證損失: 0.5360 | 驗證準確率: 99.62%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 22/30 | 訓練損失: 0.5375 | 驗證損失: 0.5362 | 驗證準確率: 99.60%


                                                                                    

Epoch 23/30 | 訓練損失: 0.5373 | 驗證損失: 0.5359 | 驗證準確率: 99.63%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 24/30 | 訓練損失: 0.5372 | 驗證損失: 0.5359 | 驗證準確率: 99.61%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 25/30 | 訓練損失: 0.5371 | 驗證損失: 0.5358 | 驗證準確率: 99.63%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 26/30 | 訓練損失: 0.5370 | 驗證損失: 0.5357 | 驗證準確率: 99.64%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 27/30 | 訓練損失: 0.5369 | 驗證損失: 0.5356 | 驗證準確率: 99.64%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 28/30 | 訓練損失: 0.5368 | 驗證損失: 0.5355 | 驗證準確率: 99.63%
  ✓ 驗證損失降低，模型已保存


                                                                                    

Epoch 29/30 | 訓練損失: 0.5367 | 驗證損失: 0.5357 | 驗證準確率: 99.64%


                                                                                    

Epoch 30/30 | 訓練損失: 0.5366 | 驗證損失: 0.5360 | 驗證準確率: 99.59%

訓練完成！

--- 載入最佳模型進行最終評估 ---
成功載入來自 Epoch 28 的最佳模型。

--- 正在評估 Ensemble_Hybrid 模型 ---


評估測試集: 100%|██████████| 871/871 [00:06<00:00, 144.47it/s]



🎉🎉🎉 --- 最終評估報告 (測試集) --- 🎉🎉🎉
                         precision    recall  f1-score   support

                 Benign     0.9978    0.9979    0.9979    378533
                    Bot     0.6020    0.8432    0.7025       287
                   DDoS     0.9983    0.9991    0.9987     25603
          DoS GoldenEye     0.9898    0.9893    0.9895      2054
               DoS Hulk     0.9874    0.9918    0.9896     34569
       DoS Slowhttptest     0.8907    0.9895    0.9375      1046
          DoS slowloris     0.9861    0.9870    0.9865      1077
            FTP-Patator     0.9899    0.9933    0.9916      1186
               PortScan     0.9248    0.9437    0.9342       391
            Rare_Attack     1.0000    0.0278    0.0541       144
            SSH-Patator     0.9983    0.9208    0.9580       644
Web Attack  Brute Force     1.0000    0.0986    0.1796       294

               accuracy                         0.9962    445828
              macro avg     0.9471    0.8152    0.8100    