先安裝需要的套件

In [None]:
!pip install torch 
!pip install torchvision   
!pip install timm
!pip install pandas
!pip install matplotlib
!pip install scikit-learn

先定義資料增強的方法

In [None]:
import torch
from torchvision import datasets, transforms
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader

import os
import torchvision.transforms.v2 as T
from PIL import Image

# Custom transform to add Gaussian noise
class AddGaussianNoise(object):
    def __init__(self, mean=0., std=1.):
        self.std = std
        self.mean = mean

    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean

# Custom transform to add Speckle noise
class AddSpeckleNoise(object):
    """
    Add speckle noise to the image.
    """
    def __init__(self, noise_level=0.1):
        """
        :param noise_level: Standard deviation of the noise distribution
        """
        self.noise_level = noise_level

    def __call__(self, tensor):
        """
        :param tensor: PyTorch tensor, the image on which noise is added
        :return: PyTorch tensor, image with speckle noise
        """
        # Generate speckle noise
        noise = torch.randn_like(tensor) * self.noise_level

        # Add speckle noise to the image
        noisy_tensor = tensor * (1 + noise)

        # Clip the values to be between 0 and 1
        noisy_tensor = torch.clamp(noisy_tensor, 0, 1)

        return noisy_tensor

class AddPoissonNoise(object):
    """
    Add Poisson noise to the image.
    """
    def __init__(self, lam=1.0):
        """
        :param lam: Lambda parameter for Poisson distribution
        """
        self.lam = lam

    def __call__(self, tensor):
        """
        :param tensor: PyTorch tensor, the image to which noise is added
        :return: PyTorch tensor, image with Poisson noise
        """
        # Generate Poisson noise
        noise = torch.poisson(self.lam * torch.ones(tensor.shape))

        # Add Poisson noise to the image
        noisy_tensor = tensor + noise / 255.0  # Assuming the image is scaled between 0 and 1

        # Clip the values to be between 0 and 1
        noisy_tensor = torch.clamp(noisy_tensor, 0, 1)

        return noisy_tensor

# Custom transform to add Salt and Pepper noise
class AddSaltPepperNoise(object):
    def __init__(self, salt_prob=0.05, pepper_prob=0.05):
        self.salt_prob = salt_prob
        self.pepper_prob = pepper_prob

    def __call__(self, tensor):
        noise = torch.rand(tensor.size())
        tensor = tensor.clone()  # Clone the tensor to avoid modifying the original
        
        # Apply salt noise: setting some pixels to 1
        tensor[noise < self.salt_prob] = 1
        
        # Apply pepper noise: setting some pixels to 0
        tensor[noise > 1 - self.pepper_prob] = 0
        
        return tensor


# Define the image augmentation transformations
transform = T.Compose([
    T.Resize(256),                     # 調整圖像大小
    T.CenterCrop(224),                 # 中心裁剪至 224x224
    T.ToTensor(),                      # 轉換為 Tensor
    T.RandomHorizontalFlip(p=0.1),     # 水平翻轉
    T.RandomVerticalFlip(p=0.1),       # 垂直翻轉
    T.RandomRotation(10),              # 隨機旋轉
    T.ColorJitter(0.4, 0.4, 0.4, 0.1), # 色彩抖動
    T.RandomGrayscale(p=0.1),          # 灰階
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 正規化 (ViT 使用 ImageNet 的均值與標準差)
])

載入數據集，以及區隔出訓練集和驗證集

In [None]:
batch_size = 32

# 載入完整的訓練資料集
full_dataset = datasets.ImageFolder(root='./train/train', transform=transform)

# 設置拆分比例（例如 80% 用於訓練，20% 用於驗證）
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size

# 使用 random_split 進行拆分
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# 建立 DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")

# # 檢查資料集類別
class_names = full_dataset.classes

載入 Pre-training 模型

In [None]:
import torch.nn as nn  # 新增這一行
from torchvision.models import vit_b_16

# 使用 torchvision 提供的 ViT 模型
model = vit_b_16(pretrained=True).to(device)

# 替換分類頭
in_features = model.heads.head.in_features  # ViT 的輸出特徵數
num_classes = len(class_names)  # 類別數量
model.heads.head = nn.Sequential(
    nn.Linear(in_features, 256),  # 隱藏層
    nn.ReLU(inplace=True),
    nn.Linear(256, num_classes)   # 輸出層
).to(device)


設定優化器和loss function

In [None]:
import torch.optim as optim

# 定義損失函數和優化器
criterion = nn.CrossEntropyLoss()  # 分類任務中的標準損失函數
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) # good

訓練模型

In [None]:
from tqdm import tqdm  # 匯入 tqdm 模組
import os  # 用於檔案操作

num_epochs = 30  # 訓練的回合數
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)  # 使用 Adam 優化器
criterion = torch.nn.CrossEntropyLoss()  # 使用交叉熵損失

# 定義保存模型的目錄
save_dir = "./saved_models"
os.makedirs(save_dir, exist_ok=True)  # 如果目錄不存在則創建

for epoch in range(num_epochs):
    model.train()  # 訓練模式
    running_loss = 0.0

    # 使用 tqdm 包裝 train_loader，顯示進度條
    with tqdm(train_loader, unit="batch") as train_progress:
        train_progress.set_description(f"Epoch {epoch+1}/{num_epochs}")
        for inputs, labels in train_progress:
            # 確保圖像與標籤移動至正確的設備
            inputs, labels = inputs.to(device), labels.to(device)

            # 前向傳播與梯度更新
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # 更新 tqdm 的描述訊息
            train_progress.set_postfix(loss=(running_loss / len(train_loader)))

    # 訓練損失輸出
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {running_loss/len(train_loader):.4f}")

    # 每 10 回


In [None]:
import os
from PIL import Image

def find_corrupted_images(root_dir):
    corrupted_images = []
    for root, _, files in os.walk(root_dir):
        for file in files:
            filepath = os.path.join(root, file)
            try:
                img = Image.open(filepath)
                img.verify()  # 僅驗證，不載入圖像
            except (OSError, IOError):
                print(f"Corrupted image found: {filepath}")
                corrupted_images.append(filepath)
    return corrupted_images

# 檢查訓練和驗證資料夾
corrupted_images = find_corrupted_images('./train/train')
print(f"Found {len(corrupted_images)} corrupted images.")


In [None]:
for image_path in corrupted_images:
    os.remove(image_path)
    print(f"Deleted corrupted image: {image_path}")


驗證模型

In [None]:
model.eval()  # 設置模型為評估模式
correct = 0
total = 0
with torch.no_grad():  # 關閉梯度計算，以節省記憶體
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        # print(outputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Validation Accuracy: {accuracy:.2f}%')


預測模型

In [None]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
from PIL import Image
import os
import pandas as pd
from torchvision import datasets

# 提取類別名稱
train_dir = './train/train'
train_dataset = datasets.ImageFolder(root=train_dir)
class_names = train_dataset.classes  # 提取類別名稱
print(f"Class names: {class_names}")

# 自定義 Dataset 類
class TestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        # 過濾掉非圖像文件，僅保留 .jpg, .jpeg, .png 等格式
        self.image_paths = sorted(
            [f for f in os.listdir(root_dir) if os.path.isfile(os.path.join(root_dir, f)) and f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        )
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.image_paths[idx])
        image = Image.open(img_path).convert("RGB")  # 確保轉換為 RGB 格式
        if self.transform:
            image = self.transform(image)
        return image, idx + 1  # 返回圖像與圖片的 ID（從 1 開始）

# 定義圖像轉換
transform = T.Compose([
    T.Resize(256),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 加載測試資料集
test_dir = './test-final/test-final'
test_dataset = TestDataset(root_dir=test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"Loaded {len(test_dataset)} test images.")

# 模型推理
model.eval()
predictions = []

with torch.no_grad():
    for inputs, ids in test_loader:  # `ids` 是自定義的圖片 ID
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)  # 獲取預測的類別索引

        # 保存每張圖片的預測結果
        for img_id, pred in zip(ids, predicted):
            predictions.append({'id': img_id.item(), 'character': class_names[pred.item()]})

# 按 ID 排序，確保輸出結果有序
predictions = sorted(predictions, key=lambda x: x['id'])

# 將結果保存到 CSV 文件
submission_df = pd.DataFrame(predictions)
submission_df.to_csv('submission.csv', index=False)
print("Submission file 'submission.csv' generated successfully!")


Task2、Compute the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

# 計算混淆矩陣
conf_matrix = confusion_matrix(all_labels, all_preds, labels=np.arange(len(class_names)))

# 繪製熱力圖
plt.figure(figsize=(15, 15))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=class_names, yticklabels=class_names)
plt.title("Confusion Matrix (50x50)")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.xticks(rotation=90)
plt.yticks(rotation=0)

# 保存混淆矩陣熱力圖為圖片
plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()



Task3、Visualization and Understanding  Convoutional Neural Networks

In [None]:
import torch
from torchvision.models import resnet18
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import os

# 定義存檔資料夾
output_dir = "./Task3"
os.makedirs(output_dir, exist_ok=True)  # 如果資料夾不存在則創建

# 定義可視化並保存 Filter 權重的函數
def visualize_filters(layer, layer_name):
    weights = layer.weight.data.cpu()  # 提取權重
    num_filters = weights.shape[0]  # 濾波器數量
    cols = 8  # 每行顯示的濾波器數量
    rows = num_filters // cols + 1

    # 聚合多通道權重（取平均值），轉換為 2D 圖像
    weights_aggregated = weights.mean(dim=1)  # shape: (num_filters, height, width)

    fig, axes = plt.subplots(rows, cols, figsize=(cols * 2, rows * 2))
    for i in range(rows * cols):
        ax = axes[i // cols, i % cols]
        if i < num_filters:
            ax.imshow(weights_aggregated[i].numpy(), cmap="viridis")
        ax.axis("off")
    plt.suptitle(f"Filters in {layer_name}", fontsize=16)
    
    # 儲存圖片
    output_path = os.path.join(output_dir, f"{layer_name}_filters.png")
    plt.savefig(output_path, bbox_inches="tight")
    print(f"Saved filters for layer {layer_name} to {output_path}")
    plt.close()

# 定義可視化並保存 Feature Map 的函數
def visualize_feature_maps(feature_maps, layer_name):
    feature_map = feature_maps[0].squeeze(0)  # 移除 batch 維度，shape: (channels, height, width)
    num_channels = feature_map.shape[0]

    cols = 8  # 每行顯示的通道數量
    rows = num_channels // cols + 1

    fig, axes = plt.subplots(rows, cols, figsize=(cols * 2, rows * 2))
    for i in range(rows * cols):
        ax = axes[i // cols, i % cols]
        if i < num_channels:
            ax.imshow(feature_map[i].cpu(), cmap="viridis")
        ax.axis("off")
    plt.suptitle(f"Feature Maps in {layer_name}", fontsize=16)
    
    # 儲存圖片
    output_path = os.path.join(output_dir, f"{layer_name}_feature_maps.png")
    plt.savefig(output_path, bbox_inches="tight")
    print(f"Saved feature maps for layer {layer_name} to {output_path}")
    plt.close()

# 載入 ResNet18 模型
model = resnet18(pretrained=True)

# 註冊 Hook 以提取特徵圖
feature_maps = {}

def hook_fn(module, input, output):
    feature_maps[module] = output  # 保存每層的輸出特徵圖

# 註冊 Hook 到所有卷積層
hooks = []
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        hooks.append(module.register_forward_hook(hook_fn))

# 準備圖像轉換
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 載入測試圖像
image_path = './test-final/test-final/2.jpg'   # 替換為您的測試圖像路徑
image = Image.open(image_path).convert("RGB")
input_tensor = transform(image).unsqueeze(0)  # 增加 batch 維度

# 模型推理，提取特徵圖
model.eval()
with torch.no_grad():
    model(input_tensor)

# 移除 Hook
for hook in hooks:
    hook.remove()

# 可視化並保存每層的濾波器權重
print("Visualizing and saving Filters...")
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        visualize_filters(module, name)

# 可視化並保存每層的特徵圖
print("Visualizing and saving Feature Maps...")
for module, fmap in feature_maps.items():
    visualize_feature_maps(fmap, str(module))
