## __Check first before starting__

In [1]:
import os

Working_directory = os.path.normpath("/mnt/mydisk/Continual_Learning_JL/Continual_Learning/")
os.chdir(Working_directory)
print(f"Working directory: {os.getcwd()}")

Working directory: /mnt/mydisk/Continual_Learning_JL/Continual_Learning


## __All imports__

In [49]:
# Operating system and file management
import os
import shutil
import contextlib
import traceback
import gc
import copy
from collections import defaultdict
import subprocess
import time
import re, pickle
import scipy.io
from scipy.io import loadmat
from glob import glob
from math import ceil

# Jupyter notebook widgets and display
import ipywidgets as widgets
from IPython.display import display

# Data manipulation and analysis
import pandas as pd
import numpy as np

# Plotting and visualization
import matplotlib.pyplot as plt
from mpl_interactions import zoom_factory, panhandler

# Machine learning and preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pickle
from ta import trend, momentum, volatility, volume

# Mathematical and scientific computing
import math
from scipy.ndimage import gaussian_filter1d

# Type hinting
from typing import Callable, Tuple

# Deep learning with PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torchvision.models import resnet18
from sklearn.utils.class_weight import compute_class_weight

## __📁 Path Settings and Constants__
This cell defines essential paths and constants for the CPSC2018 ECG dataset processing:
- `BASE_DIR`: Root directory of the project.
- `save_dir`: Path to the preprocessed `.npy` files (one for each continual learning period).
- `ECG_PATH`: Directory containing original `.mat` and `.hea` files.
- `MAX_LEN`: Length of each ECG sample, fixed to 5000 time steps (i.e., 10 seconds at 500Hz).

In [67]:
BASE_DIR = "/mnt/mydisk/Continual_Learning_JL/Continual_Learning/Class_Incremental_CL/CPSC_CIL"
save_dir = os.path.join(BASE_DIR, "processed")
ECG_PATH = os.path.join(BASE_DIR, "datas")
MAX_LEN = 5000

## __🏷️ Label Mapping and Period Configuration__

This section defines:
- `snomed_map`: Mapping from SNOMED CT codes to readable class names for 9 major ECG conditions.
- `period_label_map`: Incremental learning task structure across four periods.  
  Class `1` is reserved for "OTHER" abnormalities until Period 4 when all 9 classes are explicitly categorized.
- `print_class_distribution()`: Helper function to show class-wise data distribution.


In [4]:
# SNOMED CT to readable names
snomed_map = {
    "426783006": "NSR",    # 正常竇性心律
    "270492004": "I-AVB",  # 一度房室傳導阻滯
    "164889003": "AF",     # 心房纖維顫動
    "164909002": "LBBB",   # 左束支傳導阻滯
    "59118001":  "RBBB",   # 右束支傳導阻滯
    "284470004": "PAC",    # 心房早期搏動
    "164884008": "PVC",    # 室性早期搏動
    "429622005": "STD",    # ST 段壓低
    "164931005": "STE"     # ST 段抬高
}

# Period class mapping (固定 class 1 是「其他異常」直到 P4 移除)
period_label_map = {
    1: {"NSR": 0, "OTHER": 1},
    2: {"NSR": 0, "I-AVB": 2, "AF": 3, "OTHER": 1},
    3: {"NSR": 0, "I-AVB": 2, "AF": 3, "LBBB": 4, "RBBB": 5, "OTHER": 1},
    4: {"NSR": 0, "I-AVB": 2, "AF": 3, "LBBB": 4, "RBBB": 5, "PAC": 6, "PVC": 7, "STD": 8, "STE": 9}
}

def print_class_distribution(y, label_map):
    y = np.array(y).flatten()
    total = len(y)
    all_labels = sorted(label_map.values())
    print("\n📊 Class Distribution")
    for lbl in all_labels:
        count = np.sum(y == lbl)
        label = [k for k, v in label_map.items() if v == lbl]
        name = label[0] if label else str(lbl)
        print(f"  ├─ Label {lbl:<2} ({name:<10}) → {count:>5} samples ({(count/total)*100:5.2f}%)")

def ensure_folder(folder_path: str) -> None:
    """Ensure the given folder exists, create it if not."""
    os.makedirs(folder_path, exist_ok=True)


## 📦 EX. Load Example (Period 4) Data and View Format

This example demonstrates how to load preprocessed `.npy` data for **Period 4**, and inspect the dataset shapes and label distribution.  
Use this format as a reference when loading data in other methods (e.g., EWC, PNN, DynEx-CLoRA).

Each ECG sample:
- Has shape `(5000, 12)` → represents 10 seconds (at 500Hz) across 12-lead channels.
- Corresponding label is an integer ID (e.g., 0–9) defined by `period_label_map[4]`.

In [6]:
# 範例:載入 period 4
save_dir = os.path.join(BASE_DIR, "processed")
X_train = np.load(os.path.join(save_dir, "X_train_p4.npy"))
y_train = np.load(os.path.join(save_dir, "y_train_p4.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p4.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p4.npy"))

print("✅ Loaded")
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)
print_class_distribution(y_train, period_label_map[4])
print_class_distribution(y_test, period_label_map[4])

del X_train, y_train, X_test, y_test


✅ Loaded
X_train shape: (5493, 5000, 12)
y_train shape: (5493,)
X_test shape: (1374, 5000, 12)
y_test shape: (1374,)

📊 Class Distribution
  ├─ Label 0  (NSR       ) →   734 samples (13.36%)
  ├─ Label 2  (I-AVB     ) →   577 samples (10.50%)
  ├─ Label 3  (AF        ) →   976 samples (17.77%)
  ├─ Label 4  (LBBB      ) →   158 samples ( 2.88%)
  ├─ Label 5  (RBBB      ) →  1337 samples (24.34%)
  ├─ Label 6  (PAC       ) →   434 samples ( 7.90%)
  ├─ Label 7  (PVC       ) →   501 samples ( 9.12%)
  ├─ Label 8  (STD       ) →   628 samples (11.43%)
  ├─ Label 9  (STE       ) →   148 samples ( 2.69%)

📊 Class Distribution
  ├─ Label 0  (NSR       ) →   184 samples (13.39%)
  ├─ Label 2  (I-AVB     ) →   144 samples (10.48%)
  ├─ Label 3  (AF        ) →   244 samples (17.76%)
  ├─ Label 4  (LBBB      ) →    40 samples ( 2.91%)
  ├─ Label 5  (RBBB      ) →   335 samples (24.38%)
  ├─ Label 6  (PAC       ) →   108 samples ( 7.86%)
  ├─ Label 7  (PVC       ) →   125 samples ( 9.10%)
  ├─ La

## __Check GPU, CUDA, Pytorch__

In [7]:
!nvidia-smi

Mon May  5 13:04:45 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.86.15              Driver Version: 570.86.15      CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               Off |   00000000:2A:00.0 Off |                  Off |
| 51%   77C    P2            294W /  300W |   11188MiB /  49140MiB |    100%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA RTX A6000               Off |   00

### CUDA Details

In [8]:
def check_gpu_config():
    """
    Check GPU availability and display detailed configuration information.
    """
    # Check if GPU is available
    gpu_available = torch.cuda.is_available()
    
    # Print header
    print("=" * 50)
    print("GPU Configuration Check".center(50))
    print("=" * 50)
    
    # Basic GPU availability
    print(f"{'PyTorch Version':<25}: {torch.__version__}")
    print(f"{'GPU Available':<25}: {'Yes' if gpu_available else 'No'}")
    
    # If GPU is available, print detailed info
    if gpu_available:
        print("-" * 50)
        print("GPU Details".center(50))
        print("-" * 50)
        
        # Device info
        print(f"{'Device Name':<25}: {torch.cuda.get_device_name(0)}")
        print(f"{'Number of GPUs':<25}: {torch.cuda.device_count()}")
        print(f"{'Current Device Index':<25}: {torch.cuda.current_device()}")
        
        # Compute capability and CUDA cores
        props = torch.cuda.get_device_properties(0)
        print(f"{'Compute Capability':<25}: {props.major}.{props.minor}")
        print(f"{'Total CUDA Cores':<25}: {props.multi_processor_count * 128}")  # Approx. 128 cores per SM
        
        # Memory info
        total_memory = props.total_memory / (1024 ** 3)  # Convert to GB
        memory_allocated = torch.cuda.memory_allocated(0) / (1024 ** 3)
        memory_reserved = torch.cuda.memory_reserved(0) / (1024 ** 3)
        print(f"{'Total Memory (GB)':<25}: {total_memory:.2f}")
        print(f"{'Allocated Memory (GB)':<25}: {memory_allocated:.2f}")
        print(f"{'Reserved Memory (GB)':<25}: {memory_reserved:.2f}")
    else:
        print("-" * 50)
        print("No GPU detected. Running on CPU.".center(50))
        print("-" * 50)
    
    print("=" * 50)

if __name__ == "__main__":
    check_gpu_config()

             GPU Configuration Check              
PyTorch Version          : 2.5.1
GPU Available            : Yes
--------------------------------------------------
                   GPU Details                    
--------------------------------------------------
Device Name              : NVIDIA RTX A6000
Number of GPUs           : 3
Current Device Index     : 0
Compute Capability       : 8.6
Total CUDA Cores         : 10752
Total Memory (GB)        : 47.41
Allocated Memory (GB)    : 0.00
Reserved Memory (GB)     : 0.00


### PyTorch Details

In [9]:
def print_torch_config():
    """Print PyTorch and CUDA configuration in a formatted manner."""
    print("=" * 50)
    print("PyTorch Configuration".center(50))
    print("=" * 50)
    
    # Basic PyTorch and CUDA info
    print(f"{'PyTorch Version':<25}: {torch.__version__}")
    print(f"{'CUDA Compiled Version':<25}: {torch.version.cuda}")
    print(f"{'CUDA Available':<25}: {'Yes' if torch.cuda.is_available() else 'No'}")
    print(f"{'Number of GPUs':<25}: {torch.cuda.device_count()}")

    # GPU details if available
    if torch.cuda.is_available():
        print(f"{'GPU Name':<25}: {torch.cuda.get_device_name(0)}")

    print("-" * 50)
    
    # Seed setting
    torch.manual_seed(42)
    print(f"{'Random Seed':<25}: 42 (Seeding successful!)")
    
    print("=" * 50)

if __name__ == "__main__":
    print_torch_config()

              PyTorch Configuration               
PyTorch Version          : 2.5.1
CUDA Compiled Version    : 12.1
CUDA Available           : Yes
Number of GPUs           : 3
GPU Name                 : NVIDIA RTX A6000
--------------------------------------------------
Random Seed              : 42 (Seeding successful!)


## __⚙️ GPU Selection — Auto-select the least loaded GPU__
This code automatically scans available GPUs and selects the one with the lowest current memory usage.


In [10]:
def auto_select_cuda_device(verbose=True):
    """
    Automatically selects the CUDA GPU with the least memory usage.
    Falls back to CPU if no GPU is available.
    """
    if not torch.cuda.is_available():
        print("🚫 No CUDA GPU available. Using CPU.")
        return torch.device("cpu")

    try:
        # Run nvidia-smi to get memory usage of each GPU
        smi_output = subprocess.check_output(
            ['nvidia-smi', '--query-gpu=memory.used', '--format=csv,nounits,noheader'],
            encoding='utf-8'
        )
        memory_used = [int(x) for x in smi_output.strip().split('\n')]
        best_gpu = int(np.argmin(memory_used))

        if verbose:
            print("🎯 Automatically selected GPU:")
            print(f"    - CUDA Device ID : {best_gpu}")
            print(f"    - Memory Used    : {memory_used[best_gpu]} MiB")
            print(f"    - Device Name    : {torch.cuda.get_device_name(best_gpu)}")
        return torch.device(f"cuda:{best_gpu}")
    except Exception as e:
        print(f"⚠️ Failed to auto-detect GPU. Falling back to cuda:0. ({e})")
        return torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Execute and assign
device = auto_select_cuda_device()

🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 18 MiB
    - Device Name    : NVIDIA RTX A6000


## __Model Selection__

### MLP

In [22]:
class MLP(nn.Module):
    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int, dropout: float = 0.2):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.relu1 = nn.ReLU()
        self.drop1 = nn.Dropout(dropout)

        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.relu2 = nn.ReLU()
        self.drop2 = nn.Dropout(dropout)

        self.out = nn.Linear(hidden_dim, output_dim)

        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):  # x: (B, 5000, 12)
        B = x.size(0)
        x = x.view(B, -1)  # → (B, 60000)
        x = self.drop1(self.relu1(self.bn1(self.fc1(x))))
        x = self.drop2(self.relu2(self.bn2(self.fc2(x))))
        return self.out(x)  # → (B, num_classes)


### ResNet 18 - 1D

In [68]:
class ResNet18_1D(nn.Module):
    def __init__(self, input_channels: int, output_size: int):
        super(ResNet18_1D, self).__init__()
        base_model = resnet18(pretrained=False)

        self.conv1 = nn.Conv1d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)

        self.layer1 = base_model.layer1
        self.layer2 = base_model.layer2
        self.layer3 = base_model.layer3
        self.layer4 = base_model.layer4

        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.classifier = nn.Linear(512, output_size)

        self._convert_layers_to_1d()
        # self._init_weights()

    def _convert_layers_to_1d(self):
        for name in ['layer1', 'layer2', 'layer3', 'layer4']:
            layer = getattr(self, name)
            for block in layer:
                block.conv1 = nn.Conv1d(block.conv1.in_channels, block.conv1.out_channels,
                                        kernel_size=3, stride=block.conv1.stride[0],
                                        padding=1, bias=False)
                block.bn1 = nn.BatchNorm1d(block.bn1.num_features)
                block.conv2 = nn.Conv1d(block.conv2.in_channels, block.conv2.out_channels,
                                        kernel_size=3, stride=1, padding=1, bias=False)
                block.bn2 = nn.BatchNorm1d(block.bn2.num_features)
                if block.downsample is not None:
                    conv = nn.Conv1d(block.downsample[0].in_channels,
                                     block.downsample[0].out_channels,
                                     kernel_size=1, stride=block.downsample[0].stride[0], bias=False)
                    bn = nn.BatchNorm1d(block.downsample[1].num_features)
                    block.downsample = nn.Sequential(conv, bn)

    # def _init_weights(self):
    #     for m in self.modules():
    #         if isinstance(m, nn.Conv1d):
    #             nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    #         elif isinstance(m, nn.BatchNorm1d):
    #             nn.init.constant_(m.weight, 1)
    #             nn.init.constant_(m.bias, 0)
    #         elif isinstance(m, nn.Linear):
    #             nn.init.xavier_uniform_(m.weight)
    #             nn.init.constant_(m.bias, 0)

    def forward(self, x):  # x: (B, T, D)
        x = x.permute(0, 2, 1)  # → (B, D, T)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.global_pool(x).squeeze(-1)  # → (B, 512)
        x = self.classifier(x)               # → (B, num_classes)
        return x


### ResNet 18 - 1D_v2

In [56]:
class ResNet18_1D_v2(nn.Module):
    def __init__(self, input_channels: int, output_size: int, dropout_rate=0.2):
        super(ResNet18_1D_v2, self).__init__()
        base_model = resnet18(pretrained=False)

        self.conv1 = nn.Conv1d(input_channels, 64, kernel_size=15, stride=2, padding=7, bias=False)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)

        self.layer1 = base_model.layer1
        self.layer2 = base_model.layer2
        self.layer3 = base_model.layer3
        self.layer4 = base_model.layer4

        self.global_pool = nn.AdaptiveAvgPool1d(1)

        # 分類器 - 加入dropout和一個額外的層
        self.fc1 = nn.Linear(512, 256)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(256, output_size)
        self.dropout2 = nn.Dropout(dropout_rate/2)  # 較小的dropout率

        # self.classifier = nn.Linear(512, output_size)

        self._convert_layers_to_1d()

    def _convert_layers_to_1d(self):
        for name in ['layer1', 'layer2', 'layer3', 'layer4']:
            layer = getattr(self, name)
            for block in layer:
                block.conv1 = nn.Conv1d(block.conv1.in_channels, block.conv1.out_channels,
                                        kernel_size=3, stride=block.conv1.stride[0],
                                        padding=1, bias=False)
                block.bn1 = nn.BatchNorm1d(block.bn1.num_features)
                block.conv2 = nn.Conv1d(block.conv2.in_channels, block.conv2.out_channels,
                                        kernel_size=3, stride=1, padding=1, bias=False)
                block.bn2 = nn.BatchNorm1d(block.bn2.num_features)
                if block.downsample is not None:
                    conv = nn.Conv1d(block.downsample[0].in_channels,
                                     block.downsample[0].out_channels,
                                     kernel_size=1, stride=block.downsample[0].stride[0], bias=False)
                    bn = nn.BatchNorm1d(block.downsample[1].num_features)
                    block.downsample = nn.Sequential(conv, bn)

    def forward(self, x):  # x: (B, T, D)
        x = x.permute(0, 2, 1)  # → (B, D, T)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.global_pool(x).squeeze(-1)  # → (B, 512)
        # x = self.classifier(x)               # → (B, num_classes)
        
        x = self.dropout1(F.relu(self.fc1(x)))
        x = self.dropout2(self.fc2(x))
        
        return x


### ResNet 18 - 1D improve

In [41]:
class ResNet18_1D_Improved(nn.Module):
    def __init__(self, input_channels: int, output_size: int, dropout_rate=0.2):
        super(ResNet18_1D_Improved, self).__init__()
        base_model = resnet18(pretrained=False)
        
        # 初始卷積層調整 - 較小的kernel size和stride，更適合ECG信號的特點
        self.conv1 = nn.Conv1d(input_channels, 64, kernel_size=15, stride=2, padding=7, bias=False)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
        
        # 基本的ResNet層
        self.layer1 = base_model.layer1
        self.layer2 = base_model.layer2
        self.layer3 = base_model.layer3
        self.layer4 = base_model.layer4
        
        # 加入注意力機制
        self.attention = SEBlock1D(512)
        
        # 全局池化
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        
        # 分類器 - 加入dropout和一個額外的層
        self.fc1 = nn.Linear(512, 256)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(256, output_size)
        self.dropout2 = nn.Dropout(dropout_rate/2)  # 較小的dropout率
        
        self._convert_layers_to_1d()
        
    def _convert_layers_to_1d(self):
        for name in ['layer1', 'layer2', 'layer3', 'layer4']:
            layer = getattr(self, name)
            for block in layer:
                # 調整卷積核大小，對於ECG信號，較大的卷積核可能更好
                block.conv1 = nn.Conv1d(block.conv1.in_channels, block.conv1.out_channels,
                                       kernel_size=7, stride=block.conv1.stride[0],
                                       padding=3, bias=False)
                block.bn1 = nn.BatchNorm1d(block.bn1.num_features)
                block.conv2 = nn.Conv1d(block.conv2.in_channels, block.conv2.out_channels,
                                       kernel_size=5, stride=1, padding=2, bias=False)
                block.bn2 = nn.BatchNorm1d(block.bn2.num_features)
                if block.downsample is not None:
                    conv = nn.Conv1d(block.downsample[0].in_channels,
                                    block.downsample[0].out_channels,
                                    kernel_size=1, stride=block.downsample[0].stride[0], bias=False)
                    bn = nn.BatchNorm1d(block.downsample[1].num_features)
                    block.downsample = nn.Sequential(conv, bn)
    
    def forward(self, x):  # x: (B, T, D)
        x = x.permute(0, 2, 1)  # → (B, D, T)
        
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        # 應用注意力機制
        x = self.attention(x)
        
        x = self.global_pool(x).squeeze(-1)  # → (B, 512)
        
        # 使用兩層分類器
        x = self.dropout1(F.relu(self.fc1(x)))
        x = self.dropout2(self.fc2(x))
        
        return x

# Squeeze-and-Excitation Block for 1D - 注意力機制
class SEBlock1D(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SEBlock1D, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1)
        return x * y.expand_as(x)

### Bi-GRU

In [30]:
class BiGRU(nn.Module):
    def __init__(self, input_size=12, hidden_size=64, num_classes=2, num_layers=2, dropout=0.3):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0.0,
            batch_first=True,
            bidirectional=True
        )

        self.fc = nn.Linear(hidden_size * 2, num_classes)
        self.drop = nn.Dropout(dropout)
        self.init_weights()

    def init_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)

    def forward(self, x):  # x: (B, 5000, 12)
        B = x.size(0)
        h0 = torch.zeros(self.num_layers * 2, B, self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)   # (B, T, 2*H)
        out = self.drop(out)
        out = out.mean(dim=1)      # → (B, 2*H)
        out = self.fc(out)         # → (B, num_classes)
        return out


### Bi-GRU with attention

In [37]:
class BiGRUWithAttention(nn.Module):
    def __init__(self, input_size: int = 12, hidden_size: int = 64, output_size: int = 2,
                 num_layers: int = 2, dropout: float = 0.0):
        super(BiGRUWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout if num_layers > 1 else 0.0
        )

        self.attention_fc = nn.Linear(hidden_size * 2, hidden_size * 2)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size * 2, output_size)

        self.init_weights()

    def init_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)  # → (B, T, 2*H)

        attn = torch.tanh(self.attention_fc(out))  # → (B, T, 2*H)
        out = attn * out                            # Element-wise attention
        out = self.dropout(out)

        out = out.mean(dim=1)  # Mean pooling → (B, 2*H)
        out = self.fc(out)     # → (B, output_size)
        return out


## __Training and validation function__

### Extra Function

In [24]:
def compute_classwise_accuracy(student_logits_flat, y_batch, class_correct, class_total):
    """
    Computes per-class accuracy by accumulating correct and total samples for each class using vectorized operations.
    
    Args:
        student_logits_flat (torch.Tensor): Model predictions (logits) in shape [batch_size * seq_len, output_size]
        y_batch (torch.Tensor): True labels in shape [batch_size * seq_len]
        class_correct (dict): Dictionary to store correct predictions per class
        class_total (dict): Dictionary to store total samples per class
    """
    # Ensure inputs are on the same device
    if student_logits_flat.device != y_batch.device:
        raise ValueError("student_logits_flat and y_batch must be on the same device")

    # Convert logits to predicted class indices
    predictions = torch.argmax(student_logits_flat, dim=-1)  # Shape: [batch_size * seq_len]

    # Compute correct predictions mask
    correct_mask = (predictions == y_batch)  # Shape: [batch_size * seq_len], boolean

    # Get unique labels in this batch
    unique_labels = torch.unique(y_batch)

    # Update class_total and class_correct using vectorized operations
    for label in unique_labels:
        label = label.item()  # Convert tensor to scalar
        if label not in class_total:
            class_total[label] = 0
            class_correct[label] = 0
        
        # Count total samples for this label
        label_mask = (y_batch == label)
        class_total[label] += label_mask.sum().item()
        
        # Count correct predictions for this label
        class_correct[label] += (label_mask & correct_mask).sum().item()

In [None]:
def get_model_parameter_info(model):
    total_params = sum(p.numel() for p in model.parameters())
    param_size_bytes = total_params * 4
    param_size_MB = param_size_bytes / (1024**2)
    return total_params, param_size_MB

In [58]:
# 從第一個附件保留的數據增強函數
def augment_ecg(signal, sigma=0.05, shift_max=20):
    """
    對ECG信號進行數據增強
    
    Args:
        signal: 形狀為 (B, T, C) 的ECG信號
        sigma: 噪聲標準差
        shift_max: 最大時間偏移量
    
    Returns:
        增強後的信號
    """
    # 添加噪聲
    noise = np.random.normal(0, sigma, signal.shape)
    signal_noisy = signal + noise
    
    # 隨機時間偏移
    shift = np.random.randint(-shift_max, shift_max)
    if shift > 0:
        signal_shifted = np.pad(signal_noisy[:, :-shift, :], ((0, 0), (shift, 0), (0, 0)), mode='edge')
    elif shift < 0:
        signal_shifted = np.pad(signal_noisy[:, -shift:, :], ((0, 0), (0, -shift), (0, 0)), mode='edge')
    else:
        signal_shifted = signal_noisy
        
    # 縮放幅度 (±10%)
    scale = np.random.uniform(0.9, 1.1)
    signal_scaled = signal_shifted * scale
    
    return signal_scaled

# 從第一個附件保留的ECG數據集類
class ECGDataset(Dataset):
    def __init__(self, X, y, augment=False, device=None):
        """
        ECG數據集類，支持數據增強
        
        Args:
            X: 輸入數據，形狀為 (N, T, C)
            y: 標籤
            augment: 是否使用數據增強
            device: 設備(CPU/GPU)
        """
        self.X = X
        self.y = y
        self.augment = augment
        self.device = device
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        x = self.X[idx].copy()  # 創建副本以避免修改原始數據
        
        if self.augment and np.random.rand() > 0.5:  # 50% 的概率進行增強
            x = augment_ecg(x[np.newaxis, ...])[0]  # 增加和移除 batch 維度
            
        x_tensor = torch.FloatTensor(x)
        y_tensor = torch.LongTensor([self.y[idx]])[0]
        
        if self.device:
            x_tensor = x_tensor.to(self.device)
            y_tensor = y_tensor.to(self.device)
            
        return x_tensor, y_tensor

### Training Function

#### No use_class_weights version (For unbalanced Dataset)

In [69]:
def train_model_general_classifier(model, output_size, criterion, optimizer,
                                   X_train, y_train, X_val, y_val, scheduler=None,
                                   num_epochs=10, batch_size=64, model_saving_folder=None,
                                   model_name=None, stop_signal_file=None, device=None):

    print("\n🚀 'train_model_general_classifier' started.")
    start_time = time.time()

    # === Folder Setup ===
    if model_saving_folder:
        if os.path.exists(model_saving_folder):
            shutil.rmtree(model_saving_folder)
            print(f"✅ Removed existing folder: {model_saving_folder}")
        os.makedirs(model_saving_folder, exist_ok=True)

    model_name = model_name or 'model'
    model_saving_folder = model_saving_folder or './saved_models'
    device = device

    # === Tensor Conversion ===
    # X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
    # y_train = torch.tensor(y_train, dtype=torch.long).to(device)
    # X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
    # y_val = torch.tensor(y_val, dtype=torch.long).to(device)

    # train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    # val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size, shuffle=False)


    # === 創建數據集和數據加載器 ===
    train_dataset = ECGDataset(X_train, y_train, augment=True, device=device)
    val_dataset   = ECGDataset(X_val, y_val, augment=False, device=device)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    print("\n✅ Data Overview:")
    print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"X_val: {X_val.shape}, y_val: {y_val.shape}")

    best_results = []
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0.0
        class_correct, class_total = {}, {}

        if stop_signal_file and os.path.exists(stop_signal_file):
            print("\n🛑 Stop signal detected. Exiting training loop.")
            break

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)  # (B, C)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item() * X_batch.size(0)
            compute_classwise_accuracy(outputs, y_batch, class_correct, class_total)

        train_loss = epoch_loss / len(train_loader.dataset)
        train_acc = {int(c): f"{(class_correct[c] / class_total[c]) * 100:.2f}%" if class_total[c] > 0 else "0.00%"
                     for c in sorted(class_total.keys())}

        # === Validation ===
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        val_class_correct, val_class_total = {}, {}
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                outputs = model(X_batch)
                val_loss += criterion(outputs, y_batch).item() * X_batch.size(0)
                predictions = torch.argmax(outputs, dim=-1)
                val_correct += (predictions == y_batch).sum().item()
                val_total += y_batch.size(0)
                compute_classwise_accuracy(outputs, y_batch, val_class_correct, val_class_total)

        val_loss /= len(val_loader.dataset)
        val_acc = val_correct / val_total
        val_acc_cls = {int(c): f"{(val_class_correct[c] / val_class_total[c]) * 100:.2f}%" if val_class_total[c] > 0 else "0.00%"
                       for c in sorted(val_class_total.keys())}

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.6f}, Train-Class-Acc: {train_acc}")
        print(f"Val Loss: {val_loss:.6f}, Val Acc: {val_acc * 100:.2f}%, Val-Class-Acc: {val_acc_cls}, LR: {optimizer.param_groups[0]['lr']:.6f}")

        model_path = os.path.join(model_saving_folder, f"{model_name}_epoch_{epoch+1}.pth")
        current = {
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'val_loss': val_loss,
            'val_accuracy': val_acc,
            'train_classwise_accuracy': train_acc,
            'val_classwise_accuracy': val_acc_cls,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'learning_rate': optimizer.param_groups[0]['lr'],
            'model_path': model_path
        }

        if len(best_results) < 5 or val_acc > best_results[-1]['val_accuracy']:
            if len(best_results) == 5:
                to_remove = best_results.pop()
                if os.path.exists(to_remove['model_path']):
                    os.remove(to_remove['model_path'])
                    print(f"🗑 Removed: {to_remove['model_path']}")
            best_results.append(current)
            best_results.sort(key=lambda x: (x['val_accuracy'], x['epoch']), reverse=True)
            torch.save(current, model_path)
            print(f"✅ Saved model: {model_path}")

        if scheduler:
            scheduler.step(val_loss)

    end_time = time.time()
    training_time = end_time - start_time
    total_params, param_size_MB = get_model_parameter_info(model)

    if best_results:
        best = best_results[0]
        best_model_path = os.path.join(model_saving_folder, f"{model_name}_best.pth")
        torch.save(best, best_model_path)
        print(f"\n🏆 Best model saved as: {best_model_path} (Val Accuracy: {best['val_accuracy'] * 100:.2f}%)")

    final_model_path = os.path.join(model_saving_folder, f"{model_name}_final.pth")
    torch.save(current, final_model_path)
    print(f"\n📌 Final model saved as: {final_model_path}")

    print("\n🎯 Top 5 Best Models:")
    for res in best_results:
        print(f"Epoch {res['epoch']}, Train Loss: {res['train_loss']:.6f}, Train-Acc: {res['train_classwise_accuracy']},\n"
              f"Val Loss: {res['val_loss']:.6f}, Val Acc: {res['val_accuracy']*100:.2f}%, Val-Class-Acc: {res['val_classwise_accuracy']},"
              f" Model Path: {res['model_path']}")

    print(f"\n🧠 Model Summary:")
    print(f"Total Parameters: {total_params:,}")
    print(f"Model Size (float32): {param_size_MB:.2f} MB")
    print(f"Total Training Time: {training_time:.2f} seconds")

    # 🔥 Cleanup
    del X_train, y_train, X_val, y_val, train_loader, val_loader
    torch.cuda.empty_cache()
    gc.collect()

    return {
        'training_time_sec': training_time,
        'total_params': total_params,
        'model_size_MB': param_size_MB,
        'best_val_accuracy': best_results[0]['val_accuracy'] if best_results else None,
        'val_classwise_accuracy': best_results[0]['val_classwise_accuracy'] if best_results else None,
        'best_model_path': best_model_path if best_results else None,
        'final_model_path': final_model_path
    }


#### use_class_weights version (For unbalanced Dataset)

In [63]:
def train_model_general_classifier_use_class_weights(model, output_size, criterion, optimizer,
                                   X_train, y_train, X_val, y_val, scheduler=None,
                                   num_epochs=10, batch_size=64, model_saving_folder=None,
                                   model_name=None, stop_signal_file=None, device=None,
                                   use_class_weights=False):
    
    print("\n🚀 'train_model_general_classifier' started.")
    start_time = time.time()

    # === Folder Setup ===
    if model_saving_folder:
        if os.path.exists(model_saving_folder):
            shutil.rmtree(model_saving_folder)
            print(f"✅ Removed existing folder: {model_saving_folder}")
        os.makedirs(model_saving_folder, exist_ok=True)

    model_name = model_name or 'model'
    model_saving_folder = model_saving_folder or './saved_models'
    device = device

    # === Tensor Conversion ===
    # X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
    # y_train = torch.tensor(y_train, dtype=torch.long).to(device)
    # X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
    # y_val = torch.tensor(y_val, dtype=torch.long).to(device)

    # train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    # val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size, shuffle=False)


    # === 創建數據集和數據加載器 ===
    train_dataset = ECGDataset(X_train, y_train, augment=True, device=device)
    val_dataset   = ECGDataset(X_val, y_val, augment=False, device=device)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    print("\n✅ Data Overview:")
    print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"X_val: {X_val.shape}, y_val: {y_val.shape}")

    # === Optional: 使用類別權重 ===
    if use_class_weights:
        from sklearn.utils.class_weight import compute_class_weight
        class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
        class_weights = torch.FloatTensor(class_weights).to(device)
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        print(f"✅ 使用類別權重: {class_weights.cpu().numpy()}")
    else:
        print("✅ 使用標準交叉熵損失函數(無類別權重)")

    best_results = []
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0.0
        class_correct, class_total = {}, {}

        if stop_signal_file and os.path.exists(stop_signal_file):
            print("\n🛑 Stop signal detected. Exiting training loop.")
            break

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)  # (B, C)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item() * X_batch.size(0)
            compute_classwise_accuracy(outputs, y_batch, class_correct, class_total)

        train_loss = epoch_loss / len(train_loader.dataset)
        train_acc = {int(c): f"{(class_correct[c] / class_total[c]) * 100:.2f}%" if class_total[c] > 0 else "0.00%"
                     for c in sorted(class_total.keys())}

        # === Validation ===
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        val_class_correct, val_class_total = {}, {}
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                outputs = model(X_batch)
                val_loss += criterion(outputs, y_batch).item() * X_batch.size(0)
                predictions = torch.argmax(outputs, dim=-1)
                val_correct += (predictions == y_batch).sum().item()
                val_total += y_batch.size(0)
                compute_classwise_accuracy(outputs, y_batch, val_class_correct, val_class_total)

        val_loss /= len(val_loader.dataset)
        val_acc = val_correct / val_total
        val_acc_cls = {int(c): f"{(val_class_correct[c] / val_class_total[c]) * 100:.2f}%" if val_class_total[c] > 0 else "0.00%"
                       for c in sorted(val_class_total.keys())}

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.6f}, Train-Class-Acc: {train_acc}")
        print(f"Val Loss: {val_loss:.6f}, Val Acc: {val_acc * 100:.2f}%, Val-Class-Acc: {val_acc_cls}, LR: {optimizer.param_groups[0]['lr']:.6f}")

        model_path = os.path.join(model_saving_folder, f"{model_name}_epoch_{epoch+1}.pth")
        current = {
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'val_loss': val_loss,
            'val_accuracy': val_acc,
            'train_classwise_accuracy': train_acc,
            'val_classwise_accuracy': val_acc_cls,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'learning_rate': optimizer.param_groups[0]['lr'],
            'model_path': model_path
        }

        if len(best_results) < 5 or val_acc > best_results[-1]['val_accuracy']:
            if len(best_results) == 5:
                to_remove = best_results.pop()
                if os.path.exists(to_remove['model_path']):
                    os.remove(to_remove['model_path'])
                    print(f"🗑 Removed: {to_remove['model_path']}")
            best_results.append(current)
            best_results.sort(key=lambda x: (x['val_accuracy'], x['epoch']), reverse=True)
            torch.save(current, model_path)
            print(f"✅ Saved model: {model_path}")

        if scheduler:
            scheduler.step(val_loss)

    end_time = time.time()
    training_time = end_time - start_time
    total_params, param_size_MB = get_model_parameter_info(model)

    if best_results:
        best = best_results[0]
        best_model_path = os.path.join(model_saving_folder, f"{model_name}_best.pth")
        torch.save(best, best_model_path)
        print(f"\n🏆 Best model saved as: {best_model_path} (Val Accuracy: {best['val_accuracy'] * 100:.2f}%)")

    final_model_path = os.path.join(model_saving_folder, f"{model_name}_final.pth")
    torch.save(current, final_model_path)
    print(f"\n📌 Final model saved as: {final_model_path}")

    print("\n🎯 Top 5 Best Models:")
    for res in best_results:
        print(f"Epoch {res['epoch']}, Train Loss: {res['train_loss']:.6f}, Train-Acc: {res['train_classwise_accuracy']},\n"
              f"Val Loss: {res['val_loss']:.6f}, Val Acc: {res['val_accuracy']*100:.2f}%, Val-Class-Acc: {res['val_classwise_accuracy']},"
              f" Model Path: {res['model_path']}")

    print(f"\n🧠 Model Summary:")
    print(f"Total Parameters: {total_params:,}")
    print(f"Model Size (float32): {param_size_MB:.2f} MB")
    print(f"Total Training Time: {training_time:.2f} seconds")

    # 🔥 Cleanup
    del X_train, y_train, X_val, y_val, train_loader, val_loader
    torch.cuda.empty_cache()
    gc.collect()

    return {
        'training_time_sec': training_time,
        'total_params': total_params,
        'model_size_MB': param_size_MB,
        'best_val_accuracy': best_results[0]['val_accuracy'] if best_results else None,
        'val_classwise_accuracy': best_results[0]['val_classwise_accuracy'] if best_results else None,
        'best_model_path': best_model_path if best_results else None,
        'final_model_path': final_model_path
    }


## __Test Model__

### MLP

In [None]:
# ==== Load Period 1 Data ====
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_size = X_train.shape[1] * X_train.shape[2]  # 5000 × 12 = 60000
hidden_size = 1024
output_size = len(np.unique(y_train))  # Period 1 通常為 2 類（NSR 與 OTHER）
num_epochs = 200
batch_size = 64
dropout = 0.0
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/MLP"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = MLP(
    input_dim=input_size,
    hidden_dim=hidden_size,
    output_dim=output_size,
    dropout=dropout
).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='MLP',
    stop_signal_file=stop_signal_file,
    device=device
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()


🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 595 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (1468, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]

🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/MLP

✅ Data Overview:
X_train: torch.Size([1468, 5000, 12]), y_train: torch.Size([1468])
X_val: torch.Size([368, 5000, 12]), y_val: torch.Size([368])
Epoch 1/200, Train Loss: 1.326211, Train-Class-Acc: {0: '57.36%', 1: '58.17%'}
Val Loss: 1.390308, Val Acc: 54.35%, Val-Class-Acc: {0: '86.96%', 1: '21.74%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/MLP/MLP_epoch_1.pth
Epoch 2/200, Train Loss: 0.332756, Train-Class-Acc: {0: '85.83%', 1: '87.19%'}
Val Loss: 1.040174, Val Acc: 62.23%, Val-Class-Acc: {0: '38.59%', 1: '85.87%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/MLP/MLP_epoch_2.pth
Epoch 3/200, Train L

### ResNet 18 - 1D

#### No Init Version

In [None]:
# ==== Load Period 1 Data ====
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_channels = X_train.shape[2]                  # 12 leads
output_size = len(np.unique(y_train))              # Number of classes (e.g., 2 for Period 1)
num_epochs = 200
batch_size = 64
dropout = 0.0                                       # Not needed for ResNet18_1D
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/ResNet18"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = ResNet18_1D(input_channels=input_channels, output_size=output_size).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# ==== Train ====
result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='ResNet18_1D',
    stop_signal_file=stop_signal_file,
    device=device
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()

🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 1379 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (1468, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]

🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18





✅ Data Overview:
X_train: torch.Size([1468, 5000, 12]), y_train: torch.Size([1468])
X_val: torch.Size([368, 5000, 12]), y_val: torch.Size([368])
Epoch 1/200, Train Loss: 0.484366, Train-Class-Acc: {0: '78.61%', 1: '75.20%'}
Val Loss: 0.520067, Val Acc: 75.82%, Val-Class-Acc: {0: '95.11%', 1: '56.52%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18/ResNet18_1D_epoch_1.pth
Epoch 2/200, Train Loss: 0.388365, Train-Class-Acc: {0: '84.60%', 1: '78.20%'}
Val Loss: 0.402622, Val Acc: 83.42%, Val-Class-Acc: {0: '75.54%', 1: '91.30%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18/ResNet18_1D_epoch_2.pth
Epoch 3/200, Train Loss: 0.358777, Train-Class-Acc: {0: '86.38%', 1: '79.84%'}
Val Loss: 0.371163, Val Acc: 84.51%, Val-Class-Acc: {0: '88.59%', 1: '80.43%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18/ResNet18_1D_epoch_3.pth
Epoch 4/200, Train Loss: 0.348711, Train-Class-Acc: {0: 

0

#### Init Version

In [36]:
# ==== Load Period 1 Data ====
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_channels = X_train.shape[2]                  # 12 leads
output_size = len(np.unique(y_train))              # Number of classes (e.g., 2 for Period 1)
num_epochs = 200
batch_size = 64
dropout = 0.0                                       # Not needed for ResNet18_1D
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/ResNet18_init"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = ResNet18_1D(input_channels=input_channels, output_size=output_size).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# ==== Train ====
result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='ResNet18_1D',
    stop_signal_file=stop_signal_file,
    device=device
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()

🎯 Automatically selected GPU:
    - CUDA Device ID : 2
    - Memory Used    : 1009 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (1468, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]





🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_init

✅ Data Overview:
X_train: torch.Size([1468, 5000, 12]), y_train: torch.Size([1468])
X_val: torch.Size([368, 5000, 12]), y_val: torch.Size([368])
Epoch 1/200, Train Loss: 0.533840, Train-Class-Acc: {0: '80.65%', 1: '72.48%'}
Val Loss: 0.531604, Val Acc: 81.79%, Val-Class-Acc: {0: '90.22%', 1: '73.37%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_init/ResNet18_1D_epoch_1.pth
Epoch 2/200, Train Loss: 0.373846, Train-Class-Acc: {0: '88.28%', 1: '78.47%'}
Val Loss: 0.413599, Val Acc: 85.33%, Val-Class-Acc: {0: '83.15%', 1: '87.50%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_init/ResNet18_1D_epoch_2.pth
Epoch 3/200, Train Loss: 0.326138, Train-Class-Acc: {0: '87.74%', 1: '81.88%'}
Val Loss: 0.451216, Val Acc: 77.72%, Val-Class-Acc: {0: '64.67%', 1: '90.76%'}, LR: 0.001000
✅ Saved 

0

#### No init Version + Arg

In [71]:
# ==== Load Period 1 Data ====
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_channels = X_train.shape[2]                  # 12 leads
output_size = len(np.unique(y_train))              # Number of classes (e.g., 2 for Period 1)
num_epochs = 200
batch_size = 64
dropout = 0.0                                       # Not needed for ResNet18_1D
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/ResNet18_arg"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = ResNet18_1D(input_channels=input_channels, output_size=output_size).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# ==== Train ====
result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='ResNet18_1D',
    stop_signal_file=stop_signal_file,
    device=device
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()

🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 743 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (1468, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]

🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_arg

✅ Data Overview:
X_train: (1468, 5000, 12), y_train: (1468,)
X_val: (368, 5000, 12), y_val: (368,)




Epoch 1/200, Train Loss: 0.548911, Train-Class-Acc: {0: '80.11%', 1: '70.71%'}
Val Loss: 0.452734, Val Acc: 79.62%, Val-Class-Acc: {0: '93.48%', 1: '65.76%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_arg/ResNet18_1D_epoch_1.pth
Epoch 2/200, Train Loss: 0.390326, Train-Class-Acc: {0: '87.19%', 1: '77.66%'}
Val Loss: 0.455185, Val Acc: 80.43%, Val-Class-Acc: {0: '89.67%', 1: '71.20%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_arg/ResNet18_1D_epoch_2.pth
Epoch 3/200, Train Loss: 0.352090, Train-Class-Acc: {0: '88.42%', 1: '80.79%'}
Val Loss: 0.434934, Val Acc: 78.26%, Val-Class-Acc: {0: '66.30%', 1: '90.22%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_arg/ResNet18_1D_epoch_3.pth
Epoch 4/200, Train Loss: 0.357478, Train-Class-Acc: {0: '87.60%', 1: '79.84%'}
Val Loss: 0.584488, Val Acc: 79.62%, Val-Class-Acc: {0: '71.20%', 1: '88.04%'}, LR: 0.001000
✅ Saved model: Cla

0

### ResNet 18 - 1D_v2

#### No Init Version

In [57]:
# ==== Load Period 1 Data ====
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_channels = X_train.shape[2]                  # 12 leads
output_size = len(np.unique(y_train))              # Number of classes (e.g., 2 for Period 1)
num_epochs = 200
batch_size = 64
dropout = 0.0                                       # Not needed for ResNet18_1D
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/ResNet18_v2"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = ResNet18_1D_v2(input_channels=input_channels, output_size=output_size).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# ==== Train ====
result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='ResNet18_1D_v2',
    stop_signal_file=stop_signal_file,
    device=device
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()

🎯 Automatically selected GPU:
    - CUDA Device ID : 0
    - Memory Used    : 4542 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (1468, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]





🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2

✅ Data Overview:
X_train: torch.Size([1468, 5000, 12]), y_train: torch.Size([1468])
X_val: torch.Size([368, 5000, 12]), y_val: torch.Size([368])
Epoch 1/200, Train Loss: 0.504826, Train-Class-Acc: {0: '80.52%', 1: '70.98%'}
Val Loss: 0.461798, Val Acc: 79.89%, Val-Class-Acc: {0: '69.02%', 1: '90.76%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2/ResNet18_1D_v2_epoch_1.pth
Epoch 2/200, Train Loss: 0.424017, Train-Class-Acc: {0: '82.43%', 1: '77.25%'}
Val Loss: 0.964001, Val Acc: 67.39%, Val-Class-Acc: {0: '39.13%', 1: '95.65%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2/ResNet18_1D_v2_epoch_2.pth
Epoch 3/200, Train Loss: 0.377516, Train-Class-Acc: {0: '86.78%', 1: '78.34%'}
Val Loss: 0.442596, Val Acc: 82.07%, Val-Class-Acc: {0: '78.80%', 1: '85.33%'}, LR: 0.001000
✅ Saved 

0

#### Data Arg Version

In [60]:
# ==== Load Period 1 Data ====
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_channels = X_train.shape[2]                  # 12 leads
output_size = len(np.unique(y_train))              # Number of classes (e.g., 2 for Period 1)
num_epochs = 200
batch_size = 64
dropout = 0.0                                       # Not needed for ResNet18_1D
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/ResNet18_v2_arg"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = ResNet18_1D_v2(input_channels=input_channels, output_size=output_size).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# ==== Train ====
result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='ResNet18_1D_v2',
    stop_signal_file=stop_signal_file,
    device=device
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()

🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 595 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (1468, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]





🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg

✅ Data Overview:
X_train: (1468, 5000, 12), y_train: (1468,)
X_val: (368, 5000, 12), y_val: (368,)
Epoch 1/200, Train Loss: 0.529950, Train-Class-Acc: {0: '79.56%', 1: '67.71%'}
Val Loss: 0.719199, Val Acc: 70.38%, Val-Class-Acc: {0: '47.28%', 1: '93.48%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg/ResNet18_1D_v2_epoch_1.pth
Epoch 2/200, Train Loss: 0.405917, Train-Class-Acc: {0: '84.60%', 1: '79.16%'}
Val Loss: 1.668743, Val Acc: 53.80%, Val-Class-Acc: {0: '8.15%', 1: '99.46%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg/ResNet18_1D_v2_epoch_2.pth
Epoch 3/200, Train Loss: 0.408089, Train-Class-Acc: {0: '81.20%', 1: '79.02%'}
Val Loss: 0.478601, Val Acc: 77.45%, Val-Class-Acc: {0: '62.50%', 1: '92.39%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CI

0

#### Data Arg + Claude Version

In [61]:
# ==== Load Period 1 Data ====
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_channels = X_train.shape[2]                  # 12 leads
output_size = len(np.unique(y_train))              # Number of classes (e.g., 2 for Period 1)
num_epochs = 200
batch_size = 64
dropout = 0.0                                       # Not needed for ResNet18_1D
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = ResNet18_1D_v2(input_channels=input_channels, output_size=output_size).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=1e-3*10,             # 最大學習率為初始學習率的10倍
    steps_per_epoch=ceil(X_train.shape[0] / batch_size),
    epochs=num_epochs,
    pct_start=0.3,                       # 在 30% 的訓練過程中達到最大學習率
    div_factor=25,                       # 初始學習率為最大值的 1/25
    final_div_factor=1000                # 最終學習率為初始學習率的 1/1000
)

# ==== Train ====
result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='ResNet18_1D_v2',
    stop_signal_file=stop_signal_file,
    device=device
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()

🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 705 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (1468, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]





🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude

✅ Data Overview:
X_train: (1468, 5000, 12), y_train: (1468,)
X_val: (368, 5000, 12), y_val: (368,)
Epoch 1/200, Train Loss: 0.500345, Train-Class-Acc: {0: '79.70%', 1: '70.57%'}
Val Loss: 0.468198, Val Acc: 77.17%, Val-Class-Acc: {0: '95.65%', 1: '58.70%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude/ResNet18_1D_v2_epoch_1.pth




Epoch 2/200, Train Loss: 0.397528, Train-Class-Acc: {0: '86.51%', 1: '77.11%'}
Val Loss: 0.451879, Val Acc: 82.88%, Val-Class-Acc: {0: '84.24%', 1: '81.52%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude/ResNet18_1D_v2_epoch_2.pth
Epoch 3/200, Train Loss: 0.358625, Train-Class-Acc: {0: '87.19%', 1: '80.38%'}
Val Loss: 0.432713, Val Acc: 82.88%, Val-Class-Acc: {0: '91.30%', 1: '74.46%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude/ResNet18_1D_v2_epoch_3.pth
Epoch 4/200, Train Loss: 0.330666, Train-Class-Acc: {0: '86.10%', 1: '83.92%'}
Val Loss: 0.388681, Val Acc: 83.15%, Val-Class-Acc: {0: '82.07%', 1: '84.24%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude/ResNet18_1D_v2_epoch_4.pth
Epoch 5/200, Train Loss: 0.309669, Train-Class-Acc: {0: '87.74%', 1: '83.79%'}
Val Loss: 0.645522, Val Acc: 75.00%, Val-Class-Acc: {0: '55.43%', 1: '94

0

#### Data Arg + Claude + use_class_weights (normal data)

In [66]:
# ==== Load Period 1 Data ====
save_dir = os.path.join(BASE_DIR, "processed_normal")
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_channels = X_train.shape[2]                  # 12 leads
output_size = len(np.unique(y_train))              # Number of classes (e.g., 2 for Period 1)
num_epochs = 200
batch_size = 64
dropout = 0.0                                       # Not needed for ResNet18_1D
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude_useclass_weights"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = ResNet18_1D_v2(input_channels=input_channels, output_size=output_size).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=1e-3*10,             # 最大學習率為初始學習率的10倍
    steps_per_epoch=ceil(X_train.shape[0] / batch_size),
    epochs=num_epochs,
    pct_start=0.3,                       # 在 30% 的訓練過程中達到最大學習率
    div_factor=25,                       # 初始學習率為最大值的 1/25
    final_div_factor=1000                # 最終學習率為初始學習率的 1/1000
)

# ==== Train ====
result_summary = train_model_general_classifier_use_class_weights(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='ResNet18_1D_v2',
    stop_signal_file=stop_signal_file,
    device=device,
    use_class_weights=True
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()

🎯 Automatically selected GPU:
    - CUDA Device ID : 0
    - Memory Used    : 4544 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (5493, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]





🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude_useclass_weights

✅ Data Overview:
X_train: (5493, 5000, 12), y_train: (5493,)
X_val: (1374, 5000, 12), y_val: (1374,)
✅ 使用類別權重: [3.7418256 0.577117 ]
Epoch 1/200, Train Loss: 0.455932, Train-Class-Acc: {0: '83.38%', 1: '74.66%'}
Val Loss: 0.423869, Val Acc: 81.15%, Val-Class-Acc: {0: '78.80%', 1: '81.51%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude_useclass_weights/ResNet18_1D_v2_epoch_1.pth




Epoch 2/200, Train Loss: 0.405981, Train-Class-Acc: {0: '85.69%', 1: '78.99%'}
Val Loss: 0.364661, Val Acc: 83.19%, Val-Class-Acc: {0: '84.78%', 1: '82.94%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude_useclass_weights/ResNet18_1D_v2_epoch_2.pth
Epoch 3/200, Train Loss: 0.359642, Train-Class-Acc: {0: '88.42%', 1: '80.79%'}
Val Loss: 0.435123, Val Acc: 76.42%, Val-Class-Acc: {0: '91.30%', 1: '74.12%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude_useclass_weights/ResNet18_1D_v2_epoch_3.pth
Epoch 4/200, Train Loss: 0.339697, Train-Class-Acc: {0: '88.15%', 1: '82.54%'}
Val Loss: 0.375694, Val Acc: 84.64%, Val-Class-Acc: {0: '84.78%', 1: '84.62%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_v2_arg_claude_useclass_weights/ResNet18_1D_v2_epoch_4.pth
Epoch 5/200, Train Loss: 0.304485, Train-Class-Acc: {0: '90.33%', 1: '84.05%'}
Val Loss: 0.596593, V

0

### ResNet 18 - 1D improve

#### My Version

In [51]:
# ==== Load Period 1 Data ====
save_dir = os.path.join(BASE_DIR, "processed")
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_channels = X_train.shape[2]                  # 12 leads
output_size = len(np.unique(y_train))              # Number of classes (e.g., 2 for Period 1)
num_epochs = 200
batch_size = 64
dropout = 0.0                                       # Not needed for ResNet18_1D
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/ResNet18_Improved"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = ResNet18_1D_Improved(input_channels=input_channels, output_size=output_size, dropout_rate=0.0).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=1e-3*10,             # 最大學習率為初始學習率的10倍
    steps_per_epoch=ceil(X_train.shape[0] / batch_size),
    epochs=num_epochs,
    pct_start=0.3,                       # 在 30% 的訓練過程中達到最大學習率
    div_factor=25,                       # 初始學習率為最大值的 1/25
    final_div_factor=1000                # 最終學習率為初始學習率的 1/1000
)

# ==== Train ====
result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='ResNet18_1D',
    stop_signal_file=stop_signal_file,
    device=device
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()

🎯 Automatically selected GPU:
    - CUDA Device ID : 1
    - Memory Used    : 665 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (1468, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]





🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_Improved

✅ Data Overview:
X_train: torch.Size([1468, 5000, 12]), y_train: torch.Size([1468])
X_val: torch.Size([368, 5000, 12]), y_val: torch.Size([368])
Epoch 1/200, Train Loss: 0.462742, Train-Class-Acc: {0: '83.51%', 1: '68.12%'}
Val Loss: 1.414899, Val Acc: 67.39%, Val-Class-Acc: {0: '98.37%', 1: '36.41%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_Improved/ResNet18_1D_epoch_1.pth




Epoch 2/200, Train Loss: 0.363058, Train-Class-Acc: {0: '86.10%', 1: '81.06%'}
Val Loss: 0.988955, Val Acc: 75.00%, Val-Class-Acc: {0: '53.80%', 1: '96.20%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_Improved/ResNet18_1D_epoch_2.pth
Epoch 3/200, Train Loss: 0.325394, Train-Class-Acc: {0: '85.69%', 1: '85.42%'}
Val Loss: 0.553363, Val Acc: 79.35%, Val-Class-Acc: {0: '96.74%', 1: '61.96%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_Improved/ResNet18_1D_epoch_3.pth
Epoch 4/200, Train Loss: 0.271976, Train-Class-Acc: {0: '88.69%', 1: '87.74%'}
Val Loss: 0.424731, Val Acc: 83.42%, Val-Class-Acc: {0: '96.74%', 1: '70.11%'}, LR: 0.000400
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_Improved/ResNet18_1D_epoch_4.pth
Epoch 5/200, Train Loss: 0.249831, Train-Class-Acc: {0: '92.78%', 1: '86.51%'}
Val Loss: 0.382168, Val Acc: 85.60%, Val-Class-Acc: {0: '88.04%', 1: '83.15%'}, LR: 0.000400
✅ S

0

In [None]:
# ==== Load Period 1 Data ====
save_dir = os.path.join(BASE_DIR, "processed")
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_channels = X_train.shape[2]                  # 12 leads
output_size = len(np.unique(y_train))              # Number of classes (e.g., 2 for Period 1)
num_epochs = 200
batch_size = 64
dropout = 0.0                                       # Not needed for ResNet18_1D
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/ResNet18_Improved"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = ResNet18_1D_Improved(input_channels=input_channels, output_size=output_size, dropout_rate=0.0).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=1e-3*10,             # 最大學習率為初始學習率的10倍
    steps_per_epoch=ceil(X_train.shape[0] / batch_size),
    epochs=num_epochs,
    pct_start=0.3,                       # 在 30% 的訓練過程中達到最大學習率
    div_factor=25,                       # 初始學習率為最大值的 1/25
    final_div_factor=1000                # 最終學習率為初始學習率的 1/1000
)

# ==== Train ====
result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='ResNet18_1D',
    stop_signal_file=stop_signal_file,
    device=device
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()

#### Claude Version

In [48]:
# ==== Load Period 1 Data ====
save_dir = os.path.join(BASE_DIR, "processed")
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# 從第一個附件保留的數據增強函數
def augment_ecg(signal, sigma=0.05, shift_max=20):
    """
    對ECG信號進行數據增強
    
    Args:
        signal: 形狀為 (B, T, C) 的ECG信號
        sigma: 噪聲標準差
        shift_max: 最大時間偏移量
    
    Returns:
        增強後的信號
    """
    # 添加噪聲
    noise = np.random.normal(0, sigma, signal.shape)
    signal_noisy = signal + noise
    
    # 隨機時間偏移
    shift = np.random.randint(-shift_max, shift_max)
    if shift > 0:
        signal_shifted = np.pad(signal_noisy[:, :-shift, :], ((0, 0), (shift, 0), (0, 0)), mode='edge')
    elif shift < 0:
        signal_shifted = np.pad(signal_noisy[:, -shift:, :], ((0, 0), (0, -shift), (0, 0)), mode='edge')
    else:
        signal_shifted = signal_noisy
        
    # 縮放幅度 (±10%)
    scale = np.random.uniform(0.9, 1.1)
    signal_scaled = signal_shifted * scale
    
    return signal_scaled

# 從第一個附件保留的ECG數據集類
class ECGDataset(Dataset):
    def __init__(self, X, y, augment=False, device=None):
        """
        ECG數據集類，支持數據增強
        
        Args:
            X: 輸入數據，形狀為 (N, T, C)
            y: 標籤
            augment: 是否使用數據增強
            device: 設備(CPU/GPU)
        """
        self.X = X
        self.y = y
        self.augment = augment
        self.device = device
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        x = self.X[idx].copy()  # 創建副本以避免修改原始數據
        
        if self.augment and np.random.rand() > 0.5:  # 50% 的概率進行增強
            x = augment_ecg(x[np.newaxis, ...])[0]  # 增加和移除 batch 維度
            
        x_tensor = torch.FloatTensor(x)
        y_tensor = torch.LongTensor([self.y[idx]])[0]
        
        if self.device:
            x_tensor = x_tensor.to(self.device)
            y_tensor = y_tensor.to(self.device)
            
        return x_tensor, y_tensor

# 從第二個附件保留的計算類別精度函數
def compute_classwise_accuracy(student_logits_flat, y_batch, class_correct, class_total):
    """
    計算每個類別的準確率
    
    Args:
        student_logits_flat: 模型預測
        y_batch: 真實標籤
        class_correct: 每個類別正確預測的字典
        class_total: 每個類別總樣本的字典
    """
    # 確保輸入在同一設備上
    if student_logits_flat.device != y_batch.device:
        raise ValueError("student_logits_flat and y_batch must be on the same device")

    # 轉換logits為預測類別索引
    predictions = torch.argmax(student_logits_flat, dim=-1)  # 形狀: [batch_size * seq_len]

    # 計算正確預測掩碼
    correct_mask = (predictions == y_batch)  # 形狀: [batch_size * seq_len], 布爾值

    # 獲取此批次中的唯一標籤
    unique_labels = torch.unique(y_batch)

    # 使用向量化操作更新class_total和class_correct
    for label in unique_labels:
        label = label.item()  # 將張量轉換為標量
        if label not in class_total:
            class_total[label] = 0
            class_correct[label] = 0
        
        # 計算此標籤的總樣本數
        label_mask = (y_batch == label)
        class_total[label] += label_mask.sum().item()
        
        # 計算此標籤的正確預測數
        class_correct[label] += (label_mask & correct_mask).sum().item()

# 從第二個附件保留的獲取模型參數信息函數
def get_model_parameter_info(model):
    """獲取模型的參數數量和大小"""
    total_params = sum(p.numel() for p in model.parameters())
    param_size_bytes = total_params * 4  # 以float32計算
    param_size_MB = param_size_bytes / (1024**2)
    return total_params, param_size_MB

# 整合後的訓練函數
def train_model_with_early_stopping(
    model, output_size, 
    X_train, y_train, X_val, y_val,
    num_epochs=300, batch_size=32, 
    learning_rate=3e-4, weight_decay=1e-4,
    dropout_rate=0.3, patience=30,
    use_class_weights=True, use_data_augmentation=True,
    use_one_cycle_lr=True, model_saving_folder=None,
    model_name=None, stop_signal_file=None, device=None):
    """
    整合版的訓練函數，結合了數據增強、早停和詳細日誌功能
    
    Args:
        model: 要訓練的模型
        output_size: 輸出類別數
        X_train: 訓練數據
        y_train: 訓練標籤
        X_val: 驗證數據
        y_val: 驗證標籤
        num_epochs: 訓練輪數
        batch_size: 批次大小
        learning_rate: 學習率
        weight_decay: 權重衰減係數
        dropout_rate: Dropout率
        patience: 早停耐心值
        use_class_weights: 是否使用類別權重
        use_data_augmentation: 是否使用數據增強
        use_one_cycle_lr: 是否使用OneCycleLR調度器
        model_saving_folder: 模型保存路徑
        model_name: 模型名稱
        stop_signal_file: 停止信號文件路徑
        device: 設備(CPU/GPU)
    
    Returns:
        訓練結果信息字典
    """
    print("\n🚀 '整合版訓練函數' 開始運行.")
    start_time = time.time()

    # === 文件夾設置 ===
    if model_saving_folder:
        os.makedirs(model_saving_folder, exist_ok=True)
        print(f"✅ 確保模型保存文件夾存在: {model_saving_folder}")

    model_name = model_name or 'model'
    model_saving_folder = model_saving_folder or './saved_models'

    # === 設備設置 ===
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # === 數據預處理 ===
    print("\n✅ 數據概覽:")
    print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"X_val: {X_val.shape}, y_val: {y_val.shape}")
    
    # === 計算類別權重 ===
    if use_class_weights:
        class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
        class_weights = torch.FloatTensor(class_weights).to(device)
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        print(f"✅ 使用類別權重: {class_weights.cpu().numpy()}")
    else:
        criterion = nn.CrossEntropyLoss()
        print("✅ 使用標準交叉熵損失函數(無類別權重)")
    
    # === 創建數據集和數據加載器 ===
    train_dataset = ECGDataset(X_train, y_train, augment=use_data_augmentation, device=None)
    val_dataset = ECGDataset(X_val, y_val, augment=False, device=None)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # === 優化器設置 ===
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    # === 學習率調度器設置 ===
    if use_one_cycle_lr:
        steps_per_epoch = len(train_loader)
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=learning_rate*10,             # 最大學習率為初始學習率的10倍
            steps_per_epoch=steps_per_epoch,
            epochs=num_epochs,
            pct_start=0.3,                       # 在 30% 的訓練過程中達到最大學習率
            div_factor=25,                       # 初始學習率為最大值的 1/25
            final_div_factor=1000                # 最終學習率為初始學習率的 1/1000
        )
        print("✅ 使用OneCycleLR學習率調度器")
    else:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.5, patience=10, verbose=True
        )
        print("✅ 使用ReduceLROnPlateau學習率調度器")
    
    # === 訓練變量初始化 ===
    best_val_loss = float('inf')
    best_val_acc = 0
    best_model_state = None
    patience_counter = 0
    best_results = []
    epoch_history = []
    
    # === 訓練循環 ===
    for epoch in range(num_epochs):
        # 檢查是否存在停止信號
        if stop_signal_file and os.path.exists(stop_signal_file):
            print("\n🛑 檢測到停止信號。退出訓練循環。")
            break
        
        # 訓練階段
        model.train()
        train_loss = 0.0
        train_class_correct, train_class_total = {}, {}
        
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            
            if use_one_cycle_lr:
                scheduler.step()
                
            train_loss += loss.item() * X_batch.size(0)
            compute_classwise_accuracy(outputs, y_batch, train_class_correct, train_class_total)
        
        # 計算訓練指標
        train_loss = train_loss / len(train_loader.dataset)
        train_acc = {int(c): f"{(train_class_correct[c] / train_class_total[c]) * 100:.2f}%" 
                    if train_class_total[c] > 0 else "0.00%" 
                    for c in sorted(train_class_total.keys())}
        
        # 驗證階段
        model.eval()
        val_loss = 0.0
        val_correct, val_total = 0, 0
        val_class_correct, val_class_total = {}, {}
        
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                
                val_loss += loss.item() * X_batch.size(0)
                predictions = torch.argmax(outputs, dim=-1)
                val_correct += (predictions == y_batch).sum().item()
                val_total += y_batch.size(0)
                compute_classwise_accuracy(outputs, y_batch, val_class_correct, val_class_total)
        
        # 計算驗證指標
        val_loss = val_loss / len(val_loader.dataset)
        val_acc = val_correct / val_total
        val_acc_cls = {int(c): f"{(val_class_correct[c] / val_class_total[c]) * 100:.2f}%" 
                      if val_class_total[c] > 0 else "0.00%" 
                      for c in sorted(val_class_total.keys())}
        
        # 更新學習率調度器(如果不是OneCycleLR)
        if not use_one_cycle_lr:
            scheduler.step(val_loss)
        
        # 打印訓練信息
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.6f}, Train-Class-Acc: {train_acc}")
        print(f"Val Loss: {val_loss:.6f}, Val Acc: {val_acc * 100:.2f}%, Val-Class-Acc: {val_acc_cls}, LR: {optimizer.param_groups[0]['lr']:.6f}")
        
        # 保存當前模型狀態
        model_path = os.path.join(model_saving_folder, f"{model_name}_epoch_{epoch+1}.pth")
        current = {
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'val_loss': val_loss,
            'val_accuracy': val_acc,
            'train_classwise_accuracy': train_acc,
            'val_classwise_accuracy': val_acc_cls,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'learning_rate': optimizer.param_groups[0]['lr'],
            'model_path': model_path
        }
        
        epoch_history.append(current)
        
        # 檢查是否需要保存模型
        if len(best_results) < 5 or val_acc > best_results[-1]['val_accuracy']:
            if len(best_results) == 5:
                to_remove = best_results.pop()
                if os.path.exists(to_remove['model_path']):
                    os.remove(to_remove['model_path'])
                    print(f"🗑 移除: {to_remove['model_path']}")
            best_results.append(current)
            best_results.sort(key=lambda x: (x['val_accuracy'], x['epoch']), reverse=True)
            torch.save(current, model_path)
            print(f"✅ 保存模型: {model_path}")
            
            # 更新最佳模型狀態(用於早停)
            if val_acc > best_val_acc or (val_acc == best_val_acc and val_loss < best_val_loss):
                print(f'驗證準確率從 {best_val_acc*100:.2f}% 提升到 {val_acc*100:.2f}%')
                best_val_acc = val_acc
                best_val_loss = val_loss
                best_model_state = copy.deepcopy(model.state_dict())
                patience_counter = 0
            else:
                patience_counter += 1
                print(f'驗證未改善。耐心值: {patience_counter}/{patience}')
                
                if patience_counter >= patience:
                    print(f'早停在第 {epoch+1} 輪後觸發')
                    break
    
    end_time = time.time()
    training_time = end_time - start_time
    total_params, param_size_MB = get_model_parameter_info(model)
    
    # 恢復最佳模型
    model.load_state_dict(best_model_state)
    
    # 保存最佳模型
    if best_results:
        best = best_results[0]
        best_model_path = os.path.join(model_saving_folder, f"{model_name}_best.pth")
        torch.save(best, best_model_path)
        print(f"\n🏆 最佳模型保存為: {best_model_path} (驗證準確率: {best['val_accuracy'] * 100:.2f}%)")
    
    # 保存最終模型
    final_model_path = os.path.join(model_saving_folder, f"{model_name}_final.pth")
    torch.save(current, final_model_path)
    print(f"\n📌 最終模型保存為: {final_model_path}")
    
    # 打印最佳模型信息
    print("\n🎯 前5個最佳模型:")
    for res in best_results:
        print(f"Epoch {res['epoch']}, Train Loss: {res['train_loss']:.6f}, Train-Acc: {res['train_classwise_accuracy']},\n"
              f"Val Loss: {res['val_loss']:.6f}, Val Acc: {res['val_accuracy']*100:.2f}%, Val-Class-Acc: {res['val_classwise_accuracy']},"
              f" Model Path: {res['model_path']}")
    
    # 打印模型摘要
    print(f"\n🧠 模型摘要:")
    print(f"總參數量: {total_params:,}")
    print(f"模型大小 (float32): {param_size_MB:.2f} MB")
    print(f"總訓練時間: {training_time:.2f} 秒")
    
    # 清理
    del train_loader, val_loader
    torch.cuda.empty_cache()
    gc.collect()
    
    return {
        'training_time_sec': training_time,
        'total_params': total_params,
        'model_size_MB': param_size_MB,
        'best_val_accuracy': best_results[0]['val_accuracy'] if best_results else None,
        'val_classwise_accuracy': best_results[0]['val_classwise_accuracy'] if best_results else None,
        'best_model_path': best_model_path if best_results else None,
        'final_model_path': final_model_path,
        'epoch_history': epoch_history
    }

# 使用示例

# ==== 改進的訓練代碼 ====
model = ResNet18_1D_Improved(
    input_channels=X_train.shape[2], 
    output_size=len(np.unique(y_train)),
    dropout_rate=0.3
).to(device)

# 設置路徑
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/ResNet18_Improved"
))
ensure_folder(model_saving_folder)

# 使用整合版的訓練函數
training_result = train_model_with_early_stopping(
    model=model,
    output_size=len(np.unique(y_train)),
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    num_epochs=300,
    batch_size=32,
    learning_rate=3e-4,
    weight_decay=1e-4,
    dropout_rate=0.3,
    patience=30,
    use_class_weights=True,
    use_data_augmentation=True,
    use_one_cycle_lr=True,
    model_saving_folder=model_saving_folder,
    model_name="ResNet18_1D_Improved",
    stop_signal_file=stop_signal_file,
    device=device
)

print(f"最佳驗證準確率: {training_result['best_val_accuracy']*100:.2f}%")
print(f"最佳模型路徑: {training_result['best_model_path']}")

# 清理
del model
torch.cuda.empty_cache()
gc.collect()



🚀 '整合版訓練函數' 開始運行.
✅ 確保模型保存文件夾存在: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_Improved

✅ 數據概覽:
X_train: (1468, 5000, 12), y_train: (1468,)
X_val: (368, 5000, 12), y_val: (368,)
✅ 使用類別權重: [1. 1.]
✅ 使用OneCycleLR學習率調度器
Epoch 1/300, Train Loss: 0.492625, Train-Class-Acc: {0: '82.70%', 1: '71.12%'}
Val Loss: 0.430352, Val Acc: 78.80%, Val-Class-Acc: {0: '92.93%', 1: '64.67%'}, LR: 0.000121
✅ 保存模型: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_Improved/ResNet18_1D_Improved_epoch_1.pth
驗證準確率從 0.00% 提升到 78.80%
Epoch 2/300, Train Loss: 0.404841, Train-Class-Acc: {0: '86.24%', 1: '78.07%'}
Val Loss: 0.367357, Val Acc: 85.60%, Val-Class-Acc: {0: '92.93%', 1: '78.26%'}, LR: 0.000124
✅ 保存模型: Class_Incremental_CL/CPSC_CIL/Model_Selection/ResNet18_Improved/ResNet18_1D_Improved_epoch_2.pth
驗證準確率從 78.80% 提升到 85.60%
Epoch 3/300, Train Loss: 0.340290, Train-Class-Acc: {0: '89.24%', 1: '79.56%'}
Val Loss: 0.329147, Val Acc: 85.87%, Val-Class-Acc: {0: '85.87%', 1: '85.87%'}, LR: 0.0001

0

### Bi-GRU

In [32]:
# ==== Load Period 1 Data ====
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_size = X_train.shape[2]                      # F = 12 leads
hidden_size = 128                                  # 通常 128 或 256 較穩定
output_size = len(np.unique(y_train))              # #Classes (P1 = 2)
dropout = 0.0
num_epochs = 200
batch_size = 64
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/BiGRU"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = BiGRU(
    input_size=input_size,
    hidden_size=hidden_size,
    num_classes=output_size,
    dropout=dropout
).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# ==== Train ====
result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='BiGRU',
    stop_signal_file=stop_signal_file,
    device=device
)

# ==== Cleanup ====
del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()


🎯 Automatically selected GPU:
    - CUDA Device ID : 0
    - Memory Used    : 58 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (1468, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]

🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/BiGRU

✅ Data Overview:
X_train: torch.Size([1468, 5000, 12]), y_train: torch.Size([1468])
X_val: torch.Size([368, 5000, 12]), y_val: torch.Size([368])
Epoch 1/200, Train Loss: 0.604521, Train-Class-Acc: {0: '83.51%', 1: '48.91%'}
Val Loss: 0.556891, Val Acc: 74.73%, Val-Class-Acc: {0: '63.59%', 1: '85.87%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/BiGRU/BiGRU_epoch_1.pth
Epoch 2/200, Train Loss: 0.472392, Train-Class-Acc: {0: '83.24%', 1: '72.21%'}
Val Loss: 0.471113, Val Acc: 80.16%, Val-Class-Acc: {0: '74.46%', 1: '85.87%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/BiGRU/BiGRU_epoch_2.pth
Epoch 3/200

0

### Bi-GRU with attention

In [None]:
# ==== Load Period 1 Data ====
X_train = np.load(os.path.join(save_dir, "X_train_p1.npy"))  # Shape: (B, 5000, 12)
y_train = np.load(os.path.join(save_dir, "y_train_p1.npy"))
X_test = np.load(os.path.join(save_dir, "X_test_p1.npy"))
y_test = np.load(os.path.join(save_dir, "y_test_p1.npy"))

# ==== Model Hyperparameters ====
input_size = X_train.shape[2]  # 12 leads
output_size = len(np.unique(y_train))
hidden_size = 128
num_layers = 2
dropout = 0.0
num_epochs = 200
batch_size = 64
device = auto_select_cuda_device()

print("✅ input shape:", X_train.shape)
print("✅ unique y_train:", np.unique(y_train))
print("✅ unique y_test :", np.unique(y_test))
assert np.max(y_train) < output_size
assert np.max(y_test) < output_size

# ==== Paths ====
stop_signal_file = os.path.normpath(os.path.join(
    'Class_Incremental_CL', 'CPSC_CIL/stop_training.txt'
))
model_saving_folder = os.path.normpath(os.path.join(
    'Class_Incremental_CL', "CPSC_CIL/Model_Selection/BiGRU_Attn"
))
ensure_folder(model_saving_folder)

# ==== Model ====
model = BiGRUWithAttention(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    num_layers=num_layers,
    dropout=dropout
).to(device)

# ==== Optimizer and Training ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10)

# ==== Train ====
result_summary = train_model_general_classifier(
    model=model,
    output_size=output_size,
    criterion=criterion,
    optimizer=optimizer,
    X_train=X_train,
    y_train=y_train,
    X_val=X_test,
    y_val=y_test,
    scheduler=scheduler,
    num_epochs=num_epochs,
    batch_size=batch_size,
    model_saving_folder=model_saving_folder,
    model_name='BiGRU_Attn',
    stop_signal_file=stop_signal_file,
    device=device
)

del model, X_train, y_train, X_test, y_test
torch.cuda.empty_cache()
gc.collect()


🎯 Automatically selected GPU:
    - CUDA Device ID : 2
    - Memory Used    : 14539 MiB
    - Device Name    : NVIDIA RTX A6000
✅ input shape: (1468, 5000, 12)
✅ unique y_train: [0 1]
✅ unique y_test : [0 1]

🚀 'train_model_general_classifier' started.
✅ Removed existing folder: Class_Incremental_CL/CPSC_CIL/Model_Selection/BiGRU_Attn

✅ Data Overview:
X_train: torch.Size([1468, 5000, 12]), y_train: torch.Size([1468])
X_val: torch.Size([368, 5000, 12]), y_val: torch.Size([368])
Epoch 1/200, Train Loss: 0.621901, Train-Class-Acc: {0: '69.07%', 1: '64.99%'}
Val Loss: 0.530069, Val Acc: 73.64%, Val-Class-Acc: {0: '78.80%', 1: '68.48%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/BiGRU_Attn/BiGRU_Attn_epoch_1.pth
Epoch 2/200, Train Loss: 0.483146, Train-Class-Acc: {0: '81.88%', 1: '72.48%'}
Val Loss: 0.503447, Val Acc: 74.73%, Val-Class-Acc: {0: '68.48%', 1: '80.98%'}, LR: 0.001000
✅ Saved model: Class_Incremental_CL/CPSC_CIL/Model_Selection/BiGRU_Attn/BiGRU_

0

## 🧪 CPSC - Model Selection Summary

| Model             | Total Params  | Model Size | Training Time (s) | Val Acc | Class-wise Accuracy                 |
|------------------|---------------|------------|--------------------|---------|-------------------------------------|
| **MLP**           | 62,496,770    | 238.41 MB  | 53.49              | 64.40%  | {0: 61.96%, 1: 66.85%}              |
| **ResNet18_1D**   | 3,857,026     | 14.71 MB   | 134.66             | 88.86%  | {0: 91.85%, 1: 85.87%}              |
| **Bi-GRU**        | 406,018       | 1.55 MB    | 379.19             | 85.33%  | {0: 81.52%, 1: 89.13%}              |
| **Bi-GRU+Attn**   | 121,218       | 0.46 MB    | 482.90             | 84.24%  | {0: 79.89%, 1: 88.59%}              |

| ResNet18_1D refer `ResNet_Baseline_Selection_CPSC.ipynb`