# 載入資料，訓練ResNet

In [1]:
import json
import os
from PIL import Image
from torch.utils.data import Dataset

class LabelStudioDataset(Dataset):
    def __init__(self, json_file, img_root_dir, transform=None):
        with open(json_file, 'r') as f:
            self.data = json.load(f)

        self.img_root_dir = img_root_dir
        self.transform = transform

        # 建立標籤映射 (例如: {'Change': 0, 'Nochange': 1})
        # 這裡會自動抓取所有出現過的 choice 建立字典
        self.classes = list(set(item['choice'] for item in self.data))
        self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]

        # 處理圖片路徑
        rel_path = item['image'].replace('/data/upload/7/', '')
        img_path = os.path.join(self.img_root_dir, rel_path)

        image = Image.open(img_path).convert('RGB')

        # 處理標籤
        label_name = item['choice']
        label = self.class_to_idx[label_name]

        if self.transform:
            image = self.transform(image)

        return image, label

In [2]:
def train_model(model, dataloader, criterion, optimizer, EPOCHS):
  #用GPU訓練，不然就用CPU
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  for epoch in range(EPOCHS):
    #訓練模式
    model.train()

    #初始化
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        # 歸零梯度
        optimizer.zero_grad()

        # 前向傳播
        outputs = model(images)
        loss = criterion(outputs, labels)

        # 反向傳播與更新
        loss.backward()
        optimizer.step()

        # 統計數據
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100 * correct / total
    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.2f}%")

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models, transforms

# 設定
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
JSON_FILE = './0106_label.json'
IMG_ROOT = './images'
BATCH_SIZE = 5
EPOCHS = 10
LEARNING_RATE = 1e-4  # 全面微調時，LR 必須小 (1e-4 ~ 1e-5)


# 數據準備
# 加入 Normalize
# 資料增強
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = LabelStudioDataset(json_file=JSON_FILE, img_root_dir=IMG_ROOT, transform=transform)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

print(f"數據集載入完成，共有 {len(dataset)} 張圖片，類別: {dataset.classes}")

# 模型建置
# 載入預訓練模型、權重
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# 修改最後全連接層
num_classes = len(dataset.classes)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)

model = model.to(DEVICE)

# 定義 Loss 與優化器
criterion = nn.CrossEntropyLoss()

# 優化器監聽 model.parameters() (全部層)，而不僅僅是全連接層
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# 訓練
model = model.to(DEVICE)
train_model(model, dataloader, criterion, optimizer, EPOCHS)


數據集載入完成，共有 61 張圖片，類別: ['Change', 'Nochange']
Epoch 1/10 | Loss: 0.8209 | Acc: 57.38%
Epoch 2/10 | Loss: 0.5051 | Acc: 75.41%
Epoch 3/10 | Loss: 0.4794 | Acc: 78.69%
Epoch 4/10 | Loss: 0.2927 | Acc: 90.16%
Epoch 5/10 | Loss: 0.2041 | Acc: 93.44%
Epoch 6/10 | Loss: 0.3080 | Acc: 88.52%
Epoch 7/10 | Loss: 0.2345 | Acc: 91.80%
Epoch 8/10 | Loss: 0.4733 | Acc: 83.61%
Epoch 9/10 | Loss: 0.2139 | Acc: 90.16%
Epoch 10/10 | Loss: 0.1546 | Acc: 98.36%


# 模型推論

In [4]:
model.eval() # 將模型設定為評估模式

# 圖片預測
sample_image_path = "./7'25_NC_testing.jpg"

# 載入圖片並應用相同的轉換

image = Image.open(sample_image_path).convert('RGB')
input_tensor = transform(image) # 訓練時使用的 transform

# 將單張圖片增加一個 batch 維度
input_batch = input_tensor.unsqueeze(0).to(DEVICE)

with torch.no_grad(): # 推論時不計算梯度
    output = model(input_batch)

# 取得預測結果 (機率最大的類別)
probabilities = torch.nn.functional.softmax(output[0], dim=0)
predicted_class_idx = torch.argmax(probabilities).item()

# 將類別索引轉換回類別名稱

predicted_class_name = dataset.classes[predicted_class_idx]

# 取得預測類別的信心程度
confidence = probabilities[predicted_class_idx].item()

print(f"預測的類別索引: {predicted_class_idx}")
print(f"預測的類別名稱: {predicted_class_name}")
print(f"預測結果的信心程度: {confidence:.4f}")

預測的類別索引: 1
預測的類別名稱: Nochange
預測結果的信心程度: 0.9994


# 儲存模型

In [5]:
import torch

# 定義儲存路徑
model_save_path = '0107_training.pth'

# 儲存模型狀態字典和相關資訊
torch.save({
    'model_state_dict': model.state_dict(),
    'class_to_idx': dataset.class_to_idx,
    'classes': dataset.classes
}, model_save_path)

print(f"模型已成功儲存至: {model_save_path}")

模型已成功儲存至: 0107_training.pth
