# 安裝套件

In [None]:
pip install torch torchvision scikit-learn matplotlib tqdm

# 上一版

In [None]:
import os

import torch
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR

from torchvision import transforms
import torchvision.transforms.functional as TF

from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn.model_selection import train_test_split

torch.manual_seed(42)
np.random.seed(42)

TRAIN_DATA_PATHS = [f"Datasets/trainingdata{i}" for i in range(11)]  
TEST_DATA_PATH = "Datasets/testingdata"

for path in TRAIN_DATA_PATHS + [TEST_DATA_PATH]:
    if not os.path.exists(path):
        raise FileNotFoundError(f"資料路徑 {path} 不存在！")

IMG_SIZE = 224  
BATCH_SIZE = 32

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.2),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_test_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

class CustomImageDataset(Dataset):
    def __init__(self, data_paths, transform=None):
        self.image_paths = []
        self.labels = []
        self.transform = transform

        for class_id, data_path in enumerate(data_paths):
            for filename in os.listdir(data_path):
                if filename.endswith('.jpg'):
                    img_path = os.path.join(data_path, filename)
                    # assume "0_xxxx.jpg" => label=0
                    label = int(filename.split('_')[0])
                    self.image_paths.append(img_path)
                    self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

train_dataset = CustomImageDataset(
    data_paths=TRAIN_DATA_PATHS,
    transform=train_transforms
)

indices = list(range(len(train_dataset)))
train_indices, val_indices = train_test_split(indices, test_size=0.2, random_state=42)

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=val_sampler)


class TestImageDataset(Dataset):
    def __init__(self, data_path, transform=None):
        self.image_paths = []
        self.transform = transform
        for filename in os.listdir(data_path):
            if filename.endswith('.jpg'):
                img_path = os.path.join(data_path, filename)
                self.image_paths.append(img_path)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, img_path

test_dataset = TestImageDataset(TEST_DATA_PATH, transform=val_test_transforms)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"總訓練資料數量: {len(train_dataset)}")
print(f"訓練集數量: {len(train_indices)}")
print(f"驗證集數量: {len(val_indices)}")
print(f"測試集數量: {len(test_dataset)}")

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels,
                               kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels,
                               kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

    def __init__(self, num_classes=11):
        super(ResidualBlock, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(64,  3, stride=1)
        self.layer2 = self._make_layer(128, 4, stride=2)
        self.layer3 = self._make_layer(256, 6, stride=2)
        self.layer4 = self._make_layer(512, 3, stride=2)

        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, out_channels, num_blocks, stride):
        layers = []
        layers.append(ResidualBlock(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
        for _ in range(1, num_blocks):
            layers.append(ResidualBlock(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.global_pool(x)
        x = x.view(x.size(0), -1)

        x = self.dropout(x)
        x = self.fc(x)
        return x

if __name__ == "__main__":
    import torch
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = ResidualBlock(num_classes=11).to(device)
    print(model)

    dummy_input = torch.randn(2, 3, 224, 224).to(device)
    output = model(dummy_input)
    print(f"輸出維度: {output.shape}")

    def count_parameters(m):
        return sum(p.numel() for p in m.parameters() if p.requires_grad)

    print(f"模型參數量: {count_parameters(model):,}")

NUM_EPOCHS = 300       
LEARNING_RATE = 0.0005
PATIENCE = 20          
MODEL_SAVE_PATH = "best_model.pth"

def train_one_epoch(model, train_loader, criterion, optimizer, device, scheduler=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    train_loader_tqdm = tqdm(train_loader, desc="Training", leave=False)
    for images, labels in train_loader_tqdm:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        train_loader_tqdm.set_postfix(loss=loss.item())

    epoch_loss = running_loss / total
    epoch_acc = 100.0 * correct / total

    return epoch_loss, epoch_acc

def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    val_loader_tqdm = tqdm(val_loader, desc="Validating", leave=False)
    with torch.no_grad():
        for images, labels in val_loader_tqdm:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            val_loader_tqdm.set_postfix(loss=loss.item())

    val_loss = running_loss / total
    val_acc = 100.0 * correct / total
    return val_loss, val_acc

def train_model(model, train_loader, val_loader, device):
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
    scheduler = CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS, eta_min=1e-5)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    best_val_acc = 0.0
    best_epoch = 0
    patience_counter = 0

    for epoch in range(NUM_EPOCHS):
        print(f"\nEpoch [{epoch+1}/{NUM_EPOCHS}]")

        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = validate(model, val_loader, criterion, device)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_acc)
        val_accuracies.append(val_acc)

        scheduler.step()

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss:   {val_loss:.4f}, Val Acc:   {val_acc:.2f}%")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_epoch = epoch + 1
            patience_counter = 0
            torch.save(model.state_dict(), MODEL_SAVE_PATH)
            print(f"--> 最佳模型已保存 (Val Acc: {best_val_acc:.2f}%)")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print(f"--> 早停於 Epoch {epoch+1}, 最佳 Val Acc: {best_val_acc:.2f}% (Epoch {best_epoch})")
                break

    return train_losses, val_losses, train_accuracies, val_accuracies, best_val_acc, best_epoch

def plot_accuracies(train_accuracies, val_accuracies):
    plt.figure(figsize=(8, 5))
    plt.plot(train_accuracies, label='Train Accuracy')
    plt.plot(val_accuracies, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Train and Validation Accuracy over Epochs')
    plt.legend()
    plt.grid()
    plt.savefig('accuracy_plot.png')
    plt.show()

# 主程式
if __name__ == "__main__":
    train_losses, val_losses, train_accuracies, val_accuracies, best_val_acc, best_epoch = train_model(
        model, train_loader, val_loader, device
    )
    plot_accuracies(train_accuracies, val_accuracies)
    print(f"訓練完成！最佳 Val Acc: {best_val_acc:.2f}% (Epoch {best_epoch})")

def get_tta_variants(tensor_image):
    tta_images = []
    tta_images.append(tensor_image)
    flipped = TF.hflip(tensor_image)
    tta_images.append(flipped)

    return tta_images

def inference_tta(model, test_loader, device):
    model.eval()
    predictions = []
    indices = []

    test_loader_tqdm = tqdm(test_loader, desc="Inferencing (TTA)", leave=False)
    with torch.no_grad():
        for batch_images, img_paths in test_loader_tqdm:
            batch_size = batch_images.size(0)

            batch_preds = []

            for i in range(batch_size):
                single_img = batch_images[i].to(device)
                tta_imgs = get_tta_variants(single_img)

                all_logits = []
                for tta_img in tta_imgs:
                    tta_img_batch = tta_img.unsqueeze(0)
                    outputs = model(tta_img_batch)
                    all_logits.append(outputs)

                avg_logits = torch.mean(torch.stack(all_logits, dim=0), dim=0)
                _, pred = torch.max(avg_logits, dim=1)
                batch_preds.append(pred.item())

            for img_path, pred in zip(img_paths, batch_preds):
                index = int(os.path.basename(img_path).split('.')[0])
                indices.append(index)
                predictions.append(pred)

            test_loader_tqdm.set_postfix(num_processed=len(indices))

    return indices, predictions

def save_results(indices, predictions, output_file="SampleSubmission.csv"):
    results = list(zip(indices, predictions))
    results.sort(key=lambda x: x[0])  
    df = pd.DataFrame(results, columns=["Index", "Label"])
    df.to_csv(output_file, index=False)
    print(f"推理結果已保存到 {output_file}")

# 新版(ResNet50)

載入所需的Lib

In [2]:
import os
import torch
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import transforms
import torchvision.transforms.functional as TF
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


資料前處理

In [3]:
# 隨機種子
torch.manual_seed(42)
np.random.seed(42)

# 定義資料路徑
TRAIN_DATA_PATHS = [f"Datasets/trainingdata{i}" for i in range(11)]
TEST_DATA_PATH = "Datasets/testingdata"

for path in TRAIN_DATA_PATHS + [TEST_DATA_PATH]:
    if not os.path.exists(path):
        raise FileNotFoundError(f"資料路徑 {path} 不存在！")


IMG_SIZE = 224
BATCH_SIZE = 128


# 定義資料增強
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.2),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

val_test_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# 自定義 Dataset 
class CustomImageDataset(Dataset):
    def __init__(self, data_paths, transform=None):
        self.image_paths = []
        self.labels = []
        self.transform = transform

        for class_id, data_path in enumerate(data_paths):
            for filename in os.listdir(data_path):
                if filename.endswith('.jpg'):
                    img_path = os.path.join(data_path, filename)
                    label = int(filename.split('_')[0])
                    self.image_paths.append(img_path)
                    self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# 訓練資料
train_dataset = CustomImageDataset(data_paths=TRAIN_DATA_PATHS, transform=train_transforms)

# 分割訓練集和驗證集(80% 訓練, 20% 驗證)
indices = list(range(len(train_dataset)))
train_indices, val_indices = train_test_split(indices, test_size=0.2, random_state=42)

# 創建驗證集的 Sampler
val_sampler = SubsetRandomSampler(val_indices)

# 創建訓練和驗證的 DataLoader
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=val_sampler)


# 檢查類別分佈
class_counts = [0] * 11
for _, label in train_dataset:
    class_counts[label] += 1
print("類別分佈:", class_counts)


# 載入測試資料
class TestImageDataset(Dataset):
    def __init__(self, data_path, transform=None):
        self.image_paths = []
        self.transform = transform
        for filename in os.listdir(data_path):
            if filename.endswith('.jpg'):
                img_path = os.path.join(data_path, filename)
                self.image_paths.append(img_path)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, img_path

test_dataset = TestImageDataset(TEST_DATA_PATH, transform=val_test_transforms)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"總訓練資料數量: {len(train_dataset)}")
print(f"訓練集數量: {len(train_indices)}")
print(f"驗證集數量: {len(val_indices)}")
print(f"測試集數量: {len(test_dataset)}")

類別分佈: [994, 429, 1500, 986, 848, 1325, 440, 280, 855, 1500, 709]
總訓練資料數量: 9866
訓練集數量: 7892
驗證集數量: 1974
測試集數量: 6777


檢查數據格式

In [4]:
data_iter = iter(train_loader)
images, labels = next(data_iter)
print(images.shape)

torch.Size([128, 3, 224, 224])


ResNet50 架構

In [None]:
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False)
        self.fc2 = nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)

    def forward(self, x):
        avg_out = self.fc2(F.relu(self.fc1(self.avg_pool(x))))
        max_out = self.fc2(F.relu(self.fc1(self.max_pool(x))))
        return torch.sigmoid(avg_out + max_out)

class SpatialAttention(nn.Module):
    def __init__(self):
        super(SpatialAttention, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv(x)
        return torch.sigmoid(x)

class CBAM(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(CBAM, self).__init__()
        self.channel_attention = ChannelAttention(in_channels, reduction)
        self.spatial_attention = SpatialAttention()

    def forward(self, x):
        x = x * self.channel_attention(x)
        x = x * self.spatial_attention(x)
        return x

# Bottleneck 殘差塊
class Bottleneck(nn.Module):
    expansion = 4  

    def __init__(self, in_channels, out_channels, stride=1, dropblock_prob=0.0):
        super(Bottleneck, self).__init__()
        mid_channels = out_channels // self.expansion

        self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_channels)
        self.conv2 = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(mid_channels)
        self.conv3 = nn.Conv2d(mid_channels, out_channels, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)
        self.cbam = CBAM(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

        self.dropblock_prob = dropblock_prob

    def forward(self, x):
        identity = self.shortcut(x)

        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = self.cbam(out)

        if self.training and self.dropblock_prob > 0:
            keep_prob = 1 - self.dropblock_prob
            if torch.rand(1).item() > keep_prob:
                return identity

        out += identity
        out = F.relu(out)
        return out

class ResNet50(nn.Module):
    def __init__(self, num_classes=11, dropblock_prob=0.1):
        super(ResNet50, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # ResNet-50 結構：3-4-6-3 個 Bottleneck 塊
        self.layer1 = self._make_layer(256, 3, stride=1, dropblock_prob=dropblock_prob)
        self.layer2 = self._make_layer(512, 4, stride=2, dropblock_prob=dropblock_prob)
        self.layer3 = self._make_layer(1024, 6, stride=2, dropblock_prob=dropblock_prob)
        self.layer4 = self._make_layer(2048, 3, stride=2, dropblock_prob=dropblock_prob)

        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(2048, num_classes)

    def _make_layer(self, out_channels, num_blocks, stride, dropblock_prob):
        layers = []
        layers.append(Bottleneck(self.in_channels, out_channels, stride, dropblock_prob))
        self.in_channels = out_channels
        for _ in range(1, num_blocks):
            layers.append(Bottleneck(self.in_channels, out_channels, dropblock_prob=dropblock_prob))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ResNet50(num_classes=11, dropblock_prob=0.1).to(device)
    print(model)

    dummy_input = torch.randn(2, 3, 224, 224).to(device)
    output = model(dummy_input)
    print(f"輸出維度: {output.shape}")

    def count_parameters(m):
        return sum(p.numel() for p in m.parameters() if p.requires_grad)
    print(f"模型參數量: {count_parameters(model):,}")

訓練階段

In [None]:
# 訓練設置
NUM_EPOCHS = 300
LEARNING_RATE = 0.0007
PATIENCE = 100
MODEL_SAVE_PATH = "best_model.pth"
CHECKPOINT_DIR = "ResNet50_checkpoints"

if not os.path.exists(CHECKPOINT_DIR):
    os.makedirs(CHECKPOINT_DIR)

# Focal Loss 
class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, alpha=None, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = (1 - pt) ** self.gamma * ce_loss

        if self.alpha is not None:
            alpha_t = self.alpha[targets]
            focal_loss = alpha_t * focal_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

def train_one_epoch(model, train_loader, criterion, optimizer, device, scheduler=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    train_loader_tqdm = tqdm(train_loader, desc="Training", leave=False)
    for images, labels in train_loader_tqdm:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        train_loader_tqdm.set_postfix(loss=loss.item())

    epoch_loss = running_loss / total
    epoch_acc = 100.0 * correct / total
    return epoch_loss, epoch_acc

def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    val_loader_tqdm = tqdm(val_loader, desc="Validating", leave=False)
    with torch.no_grad():
        for images, labels in val_loader_tqdm:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            val_loader_tqdm.set_postfix(loss=loss.item())

    val_loss = running_loss / total
    val_acc = 100.0 * correct / total
    return val_loss, val_acc


def train_model(model, train_loader, val_loader, device):
    class_counts = [994, 429, 1500, 986, 848, 1325, 440, 280, 855, 1500, 709]
    total_samples = sum(class_counts)
    num_classes = len(class_counts)
    class_weights = torch.tensor([total_samples / (num_classes * count) for count in class_counts], dtype=torch.float).to(device)
    print("類別權重:", class_weights)

    criterion = FocalLoss(gamma=2.0, alpha=class_weights)
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=5e-4)

    warmup_epochs = 10
    scheduler = CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS - warmup_epochs, eta_min=5e-6)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    best_val_acc = 0.0
    best_epoch = 0
    patience_counter = 0
    val_acc_window = []

    for epoch in range(NUM_EPOCHS):
        print(f"\nEpoch [{epoch+1}/{NUM_EPOCHS}]")
        if epoch < warmup_epochs:
            lr = LEARNING_RATE * (epoch + 1) / warmup_epochs
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
            print(f"Warmup LR: {lr:.6f}")
        else:
            print(f"Current LR: {optimizer.param_groups[0]['lr']:.6f}")

        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = validate(model, val_loader, criterion, device)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_acc)
        val_accuracies.append(val_acc)

        val_acc_window.append(val_acc)
        if len(val_acc_window) > 3:
            val_acc_window.pop(0)
        smoothed_val_acc = sum(val_acc_window) / len(val_acc_window)

        if epoch >= warmup_epochs:
            scheduler.step()

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss:   {val_loss:.4f}, Val Acc:   {val_acc:.2f}% (Smoothed: {smoothed_val_acc:.2f}%)")

        if (epoch + 1) % 50 == 0:
            checkpoint_path = os.path.join(CHECKPOINT_DIR, f"model_epoch_{epoch+1}.pth")
            torch.save(model.state_dict(), checkpoint_path)
            print(f"--> 模型保存至 {checkpoint_path}")

        if smoothed_val_acc > best_val_acc:
            best_val_acc = smoothed_val_acc
            best_epoch = epoch + 1
            patience_counter = 0
            torch.save(model.state_dict(), MODEL_SAVE_PATH)
            print(f"--> 最佳模型保存 (Smoothed Val Acc: {best_val_acc:.2f}%)")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print(f"--> 早停於 Epoch {epoch+1}, 最佳 Smoothed Val Acc: {best_val_acc:.2f}% (Epoch {best_epoch})")
                break

    return train_losses, val_losses, train_accuracies, val_accuracies, best_val_acc, best_epoch

def plot_accuracies(train_accuracies, val_accuracies):
    plt.figure(figsize=(8, 5))
    plt.plot(train_accuracies, label='Train Accuracy')
    plt.plot(val_accuracies, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Train and Validation Accuracy over Epochs')
    plt.legend()
    plt.grid()
    plt.savefig('accuracy_plot.png')
    plt.show()

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ResNet50(num_classes=11).to(device)
    train_losses, val_losses, train_accuracies, val_accuracies, best_val_acc, best_epoch = train_model(
        model, train_loader, val_loader, device
    )
    plot_accuracies(train_accuracies, val_accuracies)
    print(f"訓練完成！最佳 Val Acc: {best_val_acc:.2f}% (Epoch {best_epoch})")

推理階段

In [None]:
import os
import torch
import torch.nn.functional as F
import torchvision.transforms.functional as TF
import pandas as pd
from tqdm import tqdm
import random
from collections import Counter

IMG_SIZE = 224

# 增強 TTA 變換函數
def get_tta_variants(tensor_image, variant_set="full"):
    tta_images = []
    weights = []

    # 基本變換（高權重）
    tta_images.append(tensor_image)  # 原始圖片
    weights.append(1.0)
    tta_images.append(TF.hflip(tensor_image))  # 水平翻轉
    weights.append(1.0)

    # 旋轉變換
    rotations = [10, -10, 20, -20, 30, -30]
    if variant_set != "full":
        rotations = random.sample(rotations, 4)  # 隨機選擇 4 個旋轉角度
    for angle in rotations:
        tta_images.append(TF.rotate(tensor_image, angle))
        weights.append(0.8)  # 旋轉變換權重稍低

    # 縮放變換
    scales = [0.8, 0.9, 1.1, 1.2]
    if variant_set != "full":
        scales = random.sample(scales, 2)  # 隨機選擇 2 個縮放比例
    for scale in scales:
        scaled = TF.resize(tensor_image, int(IMG_SIZE * scale))
        scaled = TF.resize(scaled, IMG_SIZE)
        tta_images.append(scaled)
        weights.append(0.7)  # 縮放變換權重更低

    # 亮度、對比度和飽和度變化
    brightness_factors = [0.8, 1.2]
    contrast_factors = [0.8, 1.2]
    if variant_set != "full":
        brightness_factors = random.sample(brightness_factors, 1)
        contrast_factors = random.sample(contrast_factors, 1)
    for bf in brightness_factors:
        tta_images.append(TF.adjust_brightness(tensor_image, bf))
        weights.append(0.6)
    for cf in contrast_factors:
        tta_images.append(TF.adjust_contrast(tensor_image, cf))
        weights.append(0.6)

    # 隨機裁剪
    if variant_set == "full" or random.random() > 0.5:
        tta_images.append(TF.center_crop(TF.pad(tensor_image, padding=10), IMG_SIZE))
        weights.append(0.5)

    return tta_images, weights

# 增強 TTA 推理函數（包含權重平均、後處理和投票）
def inference_tta(model, test_loader, device, num_votes=3):
    model.eval()
    predictions = []
    indices = []

    test_loader_tqdm = tqdm(test_loader, desc="Inferencing (TTA)", leave=False)
    with torch.no_grad():
        for batch_images, img_paths in test_loader_tqdm:
            batch_size = batch_images.size(0)
            batch_preds = []

            for i in range(batch_size):
                single_img = batch_images[i].to(device)

                # 多次投票
                vote_preds = []
                for vote in range(num_votes):
                    # 每次投票使用隨機子集變換
                    tta_imgs, weights = get_tta_variants(single_img, variant_set="subset")

                    # 蒐集各 TTA 影像的 logits
                    all_logits = []
                    for tta_img in tta_imgs:
                        tta_img_batch = tta_img.unsqueeze(0)
                        outputs = model(tta_img_batch)
                        all_logits.append(outputs)

                    # 權重平均 logits
                    all_logits = torch.stack(all_logits, dim=0)  # [num_variants, 1, num_classes]
                    weights_tensor = torch.tensor(weights, device=device).view(-1, 1, 1)
                    avg_logits = torch.sum(all_logits * weights_tensor, dim=0) / sum(weights)
                    _, pred = torch.max(avg_logits, dim=1)
                    vote_preds.append(pred.item())

                # 多數投票
                final_pred = Counter(vote_preds).most_common(1)[0][0]

                # logits 後處理（針對類別 4 和 5、8 和 9 的混淆）
                tta_imgs, weights = get_tta_variants(single_img, variant_set="full")
                all_logits = []
                for tta_img in tta_imgs:
                    tta_img_batch = tta_img.unsqueeze(0)
                    outputs = model(tta_img_batch)
                    all_logits.append(outputs)
                all_logits = torch.stack(all_logits, dim=0)
                weights_tensor = torch.tensor(weights, device=device).view(-1, 1, 1)
                avg_logits = torch.sum(all_logits * weights_tensor, dim=0) / sum(weights)
                probs = F.softmax(avg_logits, dim=1).squeeze(0)

                # 後處理：如果類別 4 和 5 的機率接近，根據先驗調整
                if final_pred in [4, 5]:
                    prob_4, prob_5 = probs[4].item(), probs[5].item()
                    if abs(prob_4 - prob_5) < 0.1:
                        if prob_4 > prob_5 and 848 / (848 + 1325) > 0.5:
                            final_pred = 4
                        else:
                            final_pred = 5

                # 後處理：如果類別 8 和 9 的機率接近，根據先驗調整
                if final_pred in [8, 9]:
                    prob_8, prob_9 = probs[8].item(), probs[9].item()
                    if abs(prob_8 - prob_9) < 0.1:
                        if prob_8 > prob_9 and 855 / (855 + 1500) > 0.5:
                            final_pred = 8
                        else:
                            final_pred = 9

                batch_preds.append(final_pred)

            for img_path, pred in zip(img_paths, batch_preds):
                index = int(os.path.basename(img_path).split('.')[0])
                indices.append(index)
                predictions.append(pred)

            test_loader_tqdm.set_postfix(num_processed=len(indices))

    return indices, predictions

# 保存推理結果
def save_results(indices, predictions, output_file="SampleSubmission.csv"):
    results = list(zip(indices, predictions))
    results.sort(key=lambda x: x[0])  
    df = pd.DataFrame(results, columns=["Index", "Label"])
    df.to_csv(output_file, index=False)
    print(f"推理結果保存到 {output_file}")


def run_inference(model, test_loader, device):
    model = model.to(device)
    indices, predictions = inference_tta(model, test_loader, device, num_votes=3)
    save_results(indices, predictions, output_file="SampleSubmission.csv")
    

def load_model(model, model_path="EfficientNetB4_checkpoints/best_model.pth", device="cuda"):
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()  # 設置為評估模式
    return model


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CustomEfficientNetB4(num_classes=11, dropblock_prob=0.3)
run_inference(model, test_loader, device)

Inferencing (TTA):   4%|▍         | 2/53 [02:38<1:07:14, 79.10s/it, num_processed=256]

#  Xception、InceptionV3、EfficientNet、ResNet152V2 都是目前不錯的CNN架構

我選擇 EfficientNet 來實作

In [6]:
# CBAM 模塊
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False)
        self.fc2 = nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)

    def forward(self, x):
        avg_out = self.fc2(F.relu(self.fc1(self.avg_pool(x))))
        max_out = self.fc2(F.relu(self.fc1(self.max_pool(x))))
        return torch.sigmoid(avg_out + max_out)

class SpatialAttention(nn.Module):
    def __init__(self):
        super(SpatialAttention, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv(x)
        return torch.sigmoid(x)

class CBAM(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(CBAM, self).__init__()
        self.channel_attention = ChannelAttention(in_channels, reduction)
        self.spatial_attention = SpatialAttention()

    def forward(self, x):
        x = x * self.channel_attention(x)
        x = x * self.spatial_attention(x)
        return x

# MBConv 模塊
class MBConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, expansion=6, dropblock_prob=0.0):
        super(MBConv, self).__init__()
        mid_channels = in_channels * expansion

        self.expand = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, 1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.SiLU()
        ) if expansion > 1 else nn.Identity()

        self.depthwise = nn.Sequential(
            nn.Conv2d(mid_channels, mid_channels, kernel_size, stride=stride, padding=kernel_size//2, groups=mid_channels, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.SiLU()
        )

        self.cbam = CBAM(mid_channels)

        self.project = nn.Sequential(
            nn.Conv2d(mid_channels, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels)
        )

        self.dropblock_prob = dropblock_prob
        self.shortcut = stride == 1 and in_channels == out_channels

    def forward(self, x):
        identity = x
        out = self.expand(x)
        out = self.depthwise(out)
        out = self.cbam(out)
        out = self.project(out)

        if self.shortcut and self.training and self.dropblock_prob > 0:
            keep_prob = 1 - self.dropblock_prob
            if torch.rand(1).item() < keep_prob:
                out += identity
            return out
        elif self.shortcut:
            return out + identity
        return out

class CustomEfficientNetB4(nn.Module):
    def __init__(self, num_classes=11, dropblock_prob=0.1):
        super(CustomEfficientNetB4, self).__init__()

        self.stem = nn.Sequential(
            nn.Conv2d(3, 48, 3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(48),
            nn.SiLU()
        )

        # 主結構
        stages = [
            (48, 24, 1, 3, 1, 2),   # (in_ch, out_ch, expansion, kernel_size, stride, blocks)
            (24, 32, 4, 5, 2, 2),
            (32, 56, 4, 5, 2, 4),
            (56, 112, 4, 3, 2, 4),
            (112, 160, 6, 5, 1, 6),
            (160, 272, 6, 5, 2, 8),
            (272, 448, 6, 3, 1, 2),
        ]
        self.blocks = nn.ModuleList()
        for in_ch, out_ch, exp, k, s, num_blocks in stages:
            for i in range(num_blocks):
                stride = s if i == 0 else 1
                self.blocks.append(MBConv(in_ch if i == 0 else out_ch, out_ch, k, stride, exp, dropblock_prob))

        # 頭部
        self.head = nn.Sequential(
            nn.Conv2d(448, 1792, 1, bias=False),
            nn.BatchNorm2d(1792),
            nn.SiLU(),
            nn.AdaptiveAvgPool2d(1),
            nn.Dropout(0.5),
            nn.Flatten(),
            nn.Linear(1792, num_classes)
        )

    def forward(self, x):
        x = self.stem(x)
        for block in self.blocks:
            x = block(x)
        x = self.head(x)
        return x

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CustomEfficientNetB4(num_classes=11, dropblock_prob=0.1).to(device)
    print(model)

    dummy_input = torch.randn(2, 3, 224, 224).to(device)
    output = model(dummy_input)
    print(f"輸出維度: {output.shape}")

    def count_parameters(m):
        return sum(p.numel() for p in m.parameters() if p.requires_grad)
    print(f"模型參數量: {count_parameters(model):,}")

CustomEfficientNetB4(
  (stem): Sequential(
    (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): SiLU()
  )
  (blocks): ModuleList(
    (0): MBConv(
      (expand): Identity()
      (depthwise): Sequential(
        (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU()
      )
      (cbam): CBAM(
        (channel_attention): ChannelAttention(
          (avg_pool): AdaptiveAvgPool2d(output_size=1)
          (max_pool): AdaptiveMaxPool2d(output_size=1)
          (fc1): Conv2d(48, 3, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (fc2): Conv2d(3, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
        (spatial_attention): SpatialAttention(
          (conv): Conv2d(2, 1, kernel_size=(7, 7)

訓練階段

In [None]:
# 訓練設置
NUM_EPOCHS = 200
LEARNING_RATE = 0.0005
PATIENCE = 100
MODEL_SAVE_PATH = "best_model.pth"
CHECKPOINT_DIR = "EfficientNetB4_checkpoints"

if not os.path.exists(CHECKPOINT_DIR):
    os.makedirs(CHECKPOINT_DIR)

# Focal Loss 
class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, alpha=None, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = (1 - pt) ** self.gamma * ce_loss

        if self.alpha is not None:
            alpha_t = self.alpha[targets]
            focal_loss = alpha_t * focal_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss


def train_one_epoch(model, train_loader, criterion, optimizer, device, scheduler=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    train_loader_tqdm = tqdm(train_loader, desc="Training", leave=False)
    for images, labels in train_loader_tqdm:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        train_loader_tqdm.set_postfix(loss=loss.item())

    epoch_loss = running_loss / total
    epoch_acc = 100.0 * correct / total
    return epoch_loss, epoch_acc

def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    val_loader_tqdm = tqdm(val_loader, desc="Validating", leave=False)
    with torch.no_grad():
        for images, labels in val_loader_tqdm:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            val_loader_tqdm.set_postfix(loss=loss.item())

    val_loss = running_loss / total
    val_acc = 100.0 * correct / total
    return val_loss, val_acc

def train_model(model, train_loader, val_loader, device):
    class_counts = [994, 429, 1500, 986, 848, 1325, 440, 280, 855, 1500, 709]
    total_samples = sum(class_counts)
    num_classes = len(class_counts)
    class_weights = torch.tensor([total_samples / (num_classes * count) for count in class_counts], dtype=torch.float).to(device)
    print("類別權重:", class_weights)

    criterion = FocalLoss(gamma=2.0, alpha=class_weights)
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=5e-4)

    warmup_epochs = 10
    scheduler = CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS - warmup_epochs, eta_min=5e-6)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    best_val_acc = 0.0
    best_epoch = 0
    patience_counter = 0
    val_acc_window = []

    for epoch in range(NUM_EPOCHS):
        print(f"\nEpoch [{epoch+1}/{NUM_EPOCHS}]")
        if epoch < warmup_epochs:
            lr = LEARNING_RATE * (epoch + 1) / warmup_epochs
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
            print(f"Warmup LR: {lr:.6f}")
        else:
            print(f"Current LR: {optimizer.param_groups[0]['lr']:.6f}")

        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = validate(model, val_loader, criterion, device)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_acc)
        val_accuracies.append(val_acc)

        val_acc_window.append(val_acc)
        if len(val_acc_window) > 3:
            val_acc_window.pop(0)
        smoothed_val_acc = sum(val_acc_window) / len(val_acc_window)

        if epoch >= warmup_epochs:
            scheduler.step()

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss:   {val_loss:.4f}, Val Acc:   {val_acc:.2f}% (Smoothed: {smoothed_val_acc:.2f}%)")

        if (epoch + 1) % 50 == 0:
            checkpoint_path = os.path.join(CHECKPOINT_DIR, f"model_epoch_{epoch+1}.pth")
            torch.save(model.state_dict(), checkpoint_path)
            print(f"--> 模型保存至 {checkpoint_path}")

        if smoothed_val_acc > best_val_acc:
            best_val_acc = smoothed_val_acc
            best_epoch = epoch + 1
            patience_counter = 0
            torch.save(model.state_dict(), MODEL_SAVE_PATH)
            print(f"--> 最佳模型保存 (Smoothed Val Acc: {best_val_acc:.2f}%)")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print(f"早停於 Epoch {epoch+1}, 最佳 Smoothed Val Acc: {best_val_acc:.2f}% (Epoch {best_epoch})")
                break

    return train_losses, val_losses, train_accuracies, val_accuracies, best_val_acc, best_epoch

def plot_accuracies(train_accuracies, val_accuracies):
    plt.figure(figsize=(8, 5))
    plt.plot(train_accuracies, label='Train Accuracy')
    plt.plot(val_accuracies, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Train and Validation Accuracy over Epochs')
    plt.legend()
    plt.grid()
    plt.savefig('accuracy_plot.png')
    plt.show()

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CustomEfficientNetB4(num_classes=11).to(device)
    train_losses, val_losses, train_accuracies, val_accuracies, best_val_acc, best_epoch = train_model(
        model, train_loader, val_loader, device
    )
    plot_accuracies(train_accuracies, val_accuracies)
    print(f"訓練完成！最佳 Val Acc: {best_val_acc:.2f}% (Epoch {best_epoch})")