先安裝需要的套件

In [None]:
!pip install torch 
!pip install torchvision   
!pip install timm
!pip install pandas
!pip install matplotlib

先定義資料增強的方法

In [None]:
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split, DataLoader
from torchvision import transforms

# 定義資料增強 (Transform)
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),         # 隨機裁剪並調整為 224x224
    transforms.RandomHorizontalFlip(),         # 隨機水平翻轉
    transforms.RandomRotation(15),             # 隨機旋轉 ±15 度
    transforms.ColorJitter(0.1, 0.1, 0.1, 0.1),# 顏色抖動
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]) # 標準化
])

valid_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),                # 中心裁剪為 224x224
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]) # 標準化
])

載入數據集，以及區隔出訓練集和驗證集

In [None]:
# 載入訓練資料集
full_dataset = ImageFolder('train/', transform=train_transforms)

# 按 80% 作為訓練集，20% 作為驗證集
train_size = int(0.8 * len(full_dataset))
valid_size = len(full_dataset) - train_size
train_dataset, valid_dataset = random_split(full_dataset, [train_size, valid_size])

# 修改驗證集的 Transform
valid_dataset.dataset.transform = valid_transforms

# 創建 DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False, num_workers=4)

設定測試集中的資料轉換部分

In [None]:
# 測試資料變換
test_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# 載入測試資料集
test_dataset = ImageFolder('test-final', transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)


載入預訓練模型

In [7]:
import timm
import torch.nn as nn
import torch

# 加載預訓練的 ViT 模型
model = timm.create_model('vit_base_patch16_224', pretrained=True)

# 修改最後一層以適配 50 個角色分類
num_features = model.head.in_features
model.head = nn.Linear(num_features, 50)

# 設定設備（GPU 或 CPU）
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

設定優化器和loss function

In [8]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()  # 損失函數
optimizer = optim.Adam(model.parameters(), lr=1e-4)  # 優化器


訓練模型

In [11]:
import torch

num_epochs = 10
best_val_loss = float('inf')  # 初始化最佳驗證損失為無窮大
save_path = "best_model.pth"  # 儲存最佳模型的路徑

for epoch in range(num_epochs):
    # 訓練階段
    model.train()
    running_loss = 0.0

    print(f"\nEpoch {epoch+1}/{num_epochs}")
    print("Training:")
    train_bar = tqdm(train_loader, desc="Training", leave=False)

    for inputs, labels in train_bar:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        train_bar.set_postfix(loss=loss.item())

    train_loss = running_loss / len(train_dataset)
    
    # 驗證階段
    model.eval()
    val_loss = 0.0
    correct = 0

    print("Validation:")
    valid_bar = tqdm(valid_loader, desc="Validating", leave=False)

    with torch.no_grad():
        for inputs, labels in valid_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            valid_bar.set_postfix(loss=loss.item())

    val_loss /= len(valid_dataset)
    val_acc = correct / len(valid_dataset)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

    # 儲存最佳模型
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), save_path)
        print(f"Best model saved at epoch {epoch+1} with Val Loss: {val_loss:.4f}")

    # 可選：儲存當前模型（每個 epoch）
    torch.save(model.state_dict(), f"model_epoch_{epoch+1}.pth")
    print(f"Model at epoch {epoch+1} saved.")



Epoch 1/10
Training:


                                                                           

Validation:


                                                                           

Epoch 1/10, Train Loss: 0.0000, Val Loss: 0.0000, Val Acc: 1.0000
Best model saved at epoch 1 with Val Loss: 0.0000
Model at epoch 1 saved.

Epoch 2/10
Training:


                                                                           

Validation:


                                                                           

Epoch 2/10, Train Loss: 0.0000, Val Loss: 0.0000, Val Acc: 1.0000
Model at epoch 2 saved.

Epoch 3/10
Training:


                                                                           

Validation:


                                                                           

Epoch 3/10, Train Loss: 0.0000, Val Loss: 0.0000, Val Acc: 1.0000
Best model saved at epoch 3 with Val Loss: 0.0000
Model at epoch 3 saved.

Epoch 4/10
Training:


                                                                           

Validation:


                                                                           

Epoch 4/10, Train Loss: 0.0000, Val Loss: 0.0000, Val Acc: 1.0000
Best model saved at epoch 4 with Val Loss: 0.0000
Model at epoch 4 saved.

Epoch 5/10
Training:


                                                                          

Validation:


                                                                     

Epoch 5/10, Train Loss: 0.0000, Val Loss: 0.0000, Val Acc: 1.0000
Best model saved at epoch 5 with Val Loss: 0.0000
Model at epoch 5 saved.

Epoch 6/10
Training:


                                                                     

Validation:


                                                                     

Epoch 6/10, Train Loss: 0.0000, Val Loss: 0.0000, Val Acc: 1.0000
Model at epoch 6 saved.

Epoch 7/10
Training:


                                                                     

Validation:


                                                                     

Epoch 7/10, Train Loss: 0.0000, Val Loss: 0.0000, Val Acc: 1.0000
Model at epoch 7 saved.

Epoch 8/10
Training:


                                                                     

Validation:


                                                                     

Epoch 8/10, Train Loss: 0.0000, Val Loss: 0.0000, Val Acc: 1.0000
Model at epoch 8 saved.

Epoch 9/10
Training:


                                                                     

Validation:


                                                                     

Epoch 9/10, Train Loss: 0.0000, Val Loss: 0.0000, Val Acc: 1.0000
Model at epoch 9 saved.

Epoch 10/10
Training:


                                                                     

Validation:


                                                                     

Epoch 10/10, Train Loss: 0.0000, Val Loss: 0.0000, Val Acc: 1.0000
Model at epoch 10 saved.


預測模型

In [None]:
import pandas as pd
import os
from tqdm import tqdm

model.eval()
predictions = []

# 預測測試集
print("\nPredicting test data:")
test_bar = tqdm(test_loader, desc="Predicting")

with torch.no_grad():
    for inputs, _ in test_bar:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        predictions.extend(preds.cpu().numpy())

# 提取測試集的圖片名稱
test_image_paths = [path[0] for path in test_dataset.samples]  # 獲取測試集的完整圖片路徑
test_image_names = [os.path.basename(path) for path in test_image_paths]  # 獲取圖片名稱（例如 1.jpg, 2.jpg）

# 類別索引到角色名稱的映射
idx_to_class = {v: k for k, v in full_dataset.class_to_idx.items()}  # 角色名稱對應類別索引

# 將預測結果映射到角色名稱
predicted_classes = [idx_to_class[idx] for idx in predictions]

# 按照圖片名稱的原始順序創建 DataFrame
submission = pd.DataFrame({
    'id': [name.split('.')[0] for name in test_image_names],  # 獲取圖片 ID（假設圖片名稱格式為 "1.jpg"）
    'character': predicted_classes
})

# 保存為 CSV
submission.to_csv('submission.csv', index=False)
