In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.optim.lr_scheduler import CosineAnnealingLR

## 確認裝置

In [2]:
# 1. 裝置設定
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"使用裝置：{device}")

使用裝置：cpu


  return torch._C._cuda_getDeviceCount() > 0


## 下載資料集

In [3]:
# 2. 資料前處理與載入
cifar100_mean = (0.5071, 0.4867, 0.4408)   # 可調：使用 CIFAR-100 通用統計值
cifar100_std  = (0.2675, 0.2565, 0.2761)   # 可調

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),   # 新增：基本增強（可調 padding 大小）
    transforms.RandomHorizontalFlip(),      # 新增：左右翻轉（可開關）
    transforms.ToTensor(),
    transforms.Normalize(cifar100_mean, cifar100_std),  # 新增：標準化（強烈建議保留）
])

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(cifar100_mean, cifar100_std),  # 新增：測試同樣標準化
])

trainset = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=6, pin_memory=True)

testset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=256, shuffle=False, num_workers=6, pin_memory=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data\cifar-100-python.tar.gz


100.0%


Extracting ./data\cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [4]:
class CNN2(nn.Module):
    def __init__(self, num_classes=100):
        super().__init__()
        act = nn.ReLU()  # 或改成 nn.GELU()
        self.dropout = nn.Dropout(p=0.2)

        self.block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),  # 32x32 -> 32x32
            nn.BatchNorm2d(64),
            act,
            nn.MaxPool2d(2, 2)                           # 32x32 -> 16x16
        )
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),# 16x16 -> 16x16
            nn.BatchNorm2d(128),
            act,
            nn.MaxPool2d(2, 2)                           # 16x16 -> 8x8
        )
        self.block3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),# 8x8 -> 8x8
            nn.BatchNorm2d(256),
            act,
            nn.MaxPool2d(2, 2)                           # 8x8 -> 4x4
        )
        self.block4 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, padding=1),# 4x4 -> 4x4
            nn.BatchNorm2d(256),
            act,
            nn.MaxPool2d(2, 2)                           # 4x4 -> 2x2
        )

        self.gap = nn.AdaptiveAvgPool2d(1)  # 2x2 -> 1x1
        #self.fc1 = nn.Linear(64, num_classes)
        self.fc1 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.block1(x)   # (B,64,16,16)
        x = self.block2(x)   # (B,128,8,8)
        x = self.block3(x)   # (B,256,4,4)
        x = self.block4(x)   # (B,256,2,2)
        x = self.gap(x)      # (B,256,1,1)
        x = x.view(x.size(0), -1)  # (B,256)
        x = self.dropout(x)
        assert x.size(1) == self.fc1.in_features, f"in={self.fc1.in_features}, got={x.size(1)}"
        x = self.fc1(x)      # (B,100)
        return x

model2 = CNN2().to(device)
print(model2)

# 4. 損失與優化器
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # 直接接收整數標籤
optimizer = optim.Adam(model2.parameters(), lr=0.001, weight_decay=1e-4)

CNN2(
  (dropout): Dropout(p=0.2, inplace=False)
  (block1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block4): Sequential(
    (0): Conv2d(256, 256, kernel_size=(3, 3), s

In [5]:
import matplotlib.pyplot as plt

num_epochs = 100
scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)

train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []

for epoch in range(1, num_epochs + 1):
    # ====== 訓練 ======
    model2.train()
    running_loss = 0.0
    correct, total = 0, 0

    for imgs, labels in trainloader:
        imgs, labels = imgs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        outputs = model2(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)
        _, preds = outputs.max(1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

    epoch_loss = running_loss / len(trainloader.dataset)
    epoch_acc = correct / total
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_acc)

    # ====== 驗證 ======
    model2.eval()
    val_running_loss = 0.0
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for imgs, labels in testloader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model2(imgs)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item() * imgs.size(0)
            _, preds = outputs.max(1)
            val_total += labels.size(0)
            val_correct += (preds == labels).sum().item()

    val_loss = val_running_loss / len(testloader.dataset)
    val_acc = val_correct / val_total
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

    print(f"Epoch {epoch}/{num_epochs} — "
          f"Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f} — "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
    scheduler.step()

# ====== 畫 Loss 曲線 ======
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss')
plt.plot(range(1, num_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss Curve')
plt.legend()

# ====== 畫 Accuracy 曲線 ======
plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs + 1), train_accuracies, label='Train Accuracy')
plt.plot(range(1, num_epochs + 1), val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy Curve')
plt.legend()

plt.show()

KeyboardInterrupt: 