### **第一題**
1000維的影像分析

In [2]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

imagenet_stats = [(0.485, 0.456, 0.406), (0.229, 0.224, 0.225)]

valid_tfms = transforms.Compose([
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_stats[0], imagenet_stats[1])
])

batch_size = 196

# 加載數據
trainset = datasets.Food101(root='./food101', split="train", download=False, transform=valid_tfms)
validset = datasets.Food101(root='./food101', split="test", download=False, transform=valid_tfms)

trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=6)
validloader = DataLoader(validset, batch_size=batch_size, shuffle=False, num_workers=6)

# 確認分類數量
assert trainset.classes == validset.classes
classes = trainset.classes
print("Number of classes =", len(classes))


Number of classes = 101


In [6]:
from torchvision.models import resnet50
import torch

# 載入 ResNet50 並保留完整結構
model = resnet50(weights="IMAGENET1K_V2")
model.eval()  # 設置為評估模式

# 特徵提取函數
def extract_features(loader, model, device):
    features = []
    labels = []
    with torch.no_grad():  # 不計算梯度以加速推理
        for inputs, targets in loader:
            inputs = inputs.to(device)
            outputs = model(inputs)  # 輸出 1000 維 logits
            features.append(outputs.cpu())
            labels.append(targets)
    return torch.cat(features), torch.cat(labels)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# 提取特徵
train_features, train_labels = extract_features(trainloader, model, device)
valid_features, valid_labels = extract_features(validloader, model, device)

# 隨機選取 20,000 張訓練樣本
import random

def sample_subset(features, labels, sample_size=20000):
    indices = random.sample(range(features.shape[0]), sample_size)
    subset_features = features[indices]
    subset_labels = labels[indices]
    return subset_features, subset_labels

train_features_sampled, train_labels_sampled = sample_subset(train_features.numpy(), train_labels.numpy(), 20000)

print("Sampled training features shape:", train_features_sampled.shape)
print("Sampled training labels shape:", train_labels_sampled.shape)



Sampled training features shape: (20000, 1000)
Sampled training labels shape: (20000,)


In [9]:
# Logistic Regression 訓練
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score

clf = LogisticRegression(C=0.1, max_iter=1000)
clf.fit(train_features_sampled, train_labels_sampled)

# 預測測試集
preds = clf.predict(valid_features.numpy())

# 計算準確率
accuracy = accuracy_score(valid_labels.numpy(), preds)
print("Validation Accuracy: {:.4f}".format(accuracy))

# 計算 Macro-average F1
macro_f1 = f1_score(valid_labels.numpy(), preds, average='macro')
print("Macro-average F1 Score: {:.4f}".format(macro_f1))


Validation Accuracy: 0.5598
Macro-average F1 Score: 0.5567


### **第二題**
2048維的影像辨識

In [12]:
from torchvision.models import resnet50
import torch

# 移動模型到設備
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = resnet50(weights="IMAGENET1K_V2")
model = torch.nn.Sequential(*(list(model.children())[:-1]))  # 移除 'fc' 層
model = model.to(device)  # 確保模型在 GPU 上
model.eval()

# 特徵提取函數
def extract_features(loader, model, device):
    features = []
    labels = []
    with torch.no_grad():
        for inputs, targets in loader:
            inputs = inputs.to(device)  # 確保輸入在 GPU 上
            outputs = model(inputs)  # 提取特徵
            features.append(outputs.view(outputs.size(0), -1).cpu())  # 展平成 (batch_size, 2048)
            labels.append(targets)
    return torch.cat(features), torch.cat(labels)



In [13]:
train_features, train_labels = extract_features(trainloader, model, device)
print("Train features shape:", train_features.shape)  # (20000, 2048)

Train features shape: torch.Size([75750, 2048])


In [15]:
# 隨機選取 20,000 張訓練樣本
train_features_sampled, train_labels_sampled = sample_subset(train_features.numpy(), train_labels.numpy(), 20000)

# Logistic Regression 訓練
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score

clf = LogisticRegression(C=0.1, max_iter=1000)
clf.fit(train_features_sampled, train_labels_sampled)

# 測試集預測與指標計算
valid_features, valid_labels = extract_features(validloader, model, device)
preds = clf.predict(valid_features.numpy())
accuracy = accuracy_score(valid_labels.numpy(), preds)
macro_f1 = f1_score(valid_labels.numpy(), preds, average='macro')

# 輸出結果
print("Validation Accuracy: {:.4f}".format(accuracy))
print("Macro-average F1 Score: {:.4f}".format(macro_f1))


Validation Accuracy: 0.5829
Macro-average F1 Score: 0.5797


### **比較報告與分析：1000 維特徵 vs. 2048 維特徵**
**實驗結果**

| 維度      |  Validation Accuracy | Macro-average F1 Score         |
|:-----------:|:-------:|:--------------:|
| 1000      | 0.5598    | 0.5567     |
| 2048      | 0.5829    | 0.5797     |


從結果可以觀察到：
使用 2048 維特徵 的 Validation Accuracy 和 Macro-average F1 Score 都高於使用 1000 維特徵的情況。
差異並不算非常大，但仍然具有一定的提升。


### **可能的原因分析**

**特徵維度的影響**

1. 2048 維特徵 是 ResNet50 模型中更靠近輸入的層輸出，保留了更多底層和中層的特徵，可能包含更豐富的結構化細節，例如邊緣、紋理等。
1000 維特徵 是 ResNet50 的最後輸出，主要是針對 ImageNet 類別的分類調優過的特徵，可能對 Food101 的泛化能力稍弱。
分類器的適應能力

2. Logistic Regression 能夠處理高維數據，但在 1000 維特徵 的情況下，模型可能略微欠擬合（underfitting）。
2048 維特徵 提供了更多的數據變化空間，分類器能更好地學習到不同類別的分佈。
數據適配性

3. ResNet50 的 1000 維特徵是針對 ImageNet 訓練的，可能與 Food101 的特徵分佈存在一定偏差。
2048 維特徵 則更通用，未經過針對性調整，因此在 Food101 上可能更適合。
過擬合的可能性

4. 使用 2048 維特徵可能會有更多的特徵冗餘，但目前的樣本數量（20,000 張圖片）對 Logistic Regression 來說，仍然足夠避免過擬合的問題。

### **結論與建議**

**2048 維特徵效果更好:**

1. 在本次實驗中，2048 維特徵的分類性能略高於 1000 維特徵。
高維特徵保留了更多的結構化信息，對 Food101 這樣的多類別數據集可能更為適合。
應用場景的選擇:

2. 如果計算資源有限，使用 1000 維特徵可以減少存儲和計算成本，同時保持接近的分類性能。
如果對分類精度要求較高，尤其是對 Macro-average F1 的要求更高，可以選擇 2048 維特徵。
未來優化方向:

3. 嘗試其他分類器（例如 SVM 或神經網路）進一步提升分類性能。
探討對 ResNet50 特徵進行微調（Fine-tuning），可能進一步提升 1000 維特徵的性能。

### **第三題**

In [16]:
# 1.環境準備

from torchvision import transforms

# 訓練數據的增強
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),  # 隨機裁剪
    transforms.RandomHorizontalFlip(),  # 隨機水平翻轉
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 測試數據的處理
test_transforms = transforms.Compose([
    transforms.CenterCrop(224),  # 中心裁剪
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [19]:
#數據加載

from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# 訓練集和測試集加載
train_dataset = ImageFolder(root='./food101/food-101/images', transform=train_transforms)
test_dataset = ImageFolder(root='./food101/food-101/images', transform=test_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)


In [20]:
# 2.模型定義

import torch
from torchvision.models import resnet50
import torch.nn as nn

# 載入預訓練模型
model = resnet50(weights="IMAGENET1K_V2")
num_ftrs = model.fc.in_features  # 原始 fc 層的輸入維度
model.fc = nn.Linear(num_ftrs, 101)  # 替換為 101 類別的全連接層


In [21]:
# 全模型調整
for param in model.parameters():
    param.requires_grad = True


In [22]:
# 只調整 fc 層
for param in model.parameters():
    param.requires_grad = False
for param in model.fc.parameters():
    param.requires_grad = True


In [23]:
# 3.訓練與驗證過程
#損失函數與優化器
#使用交叉熵損失和 Adam 優化器進行訓練：

import torch.optim as optim

# 定義損失函數與優化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)


In [24]:
# Early Stopping
# 根據題目要求，設置 Early Stopping（若驗證集表現 5 個 epoch 無提升則停止訓練）：

class EarlyStopping:
    def __init__(self, patience=5):
        self.patience = patience
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss):
        score = -val_loss
        if self.best_score is None or score > self.best_score:
            self.best_score = score
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True


In [25]:
# 訓練函數

def train_and_validate(model, train_loader, test_loader, criterion, optimizer, device, max_epochs=100):
    model.to(device)
    early_stopping = EarlyStopping(patience=5)
    train_loss_history, val_loss_history = [], []
    train_acc_history, val_acc_history = [], []
    val_f1_history = []

    for epoch in range(max_epochs):
        model.train()
        train_loss, correct, total = 0, 0, 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        train_loss /= len(train_loader)
        train_acc = correct / total
        train_loss_history.append(train_loss)
        train_acc_history.append(train_acc)

        # Validation
        model.eval()
        val_loss, correct, total = 0, 0, 0
        all_preds, all_targets = [], []
        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()

                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

                all_preds.extend(predicted.cpu().numpy())
                all_targets.extend(targets.cpu().numpy())

        val_loss /= len(test_loader)
        val_acc = correct / total
        val_f1 = f1_score(all_targets, all_preds, average='macro')

        val_loss_history.append(val_loss)
        val_acc_history.append(val_acc)
        val_f1_history.append(val_f1)

        print(f"Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}")

        if early_stopping(val_loss):
            print("Early stopping")
            break

    return train_loss_history, val_loss_history, train_acc_history, val_acc_history, val_f1_history


In [26]:
# 4.執行兩種 Fine-Tuning 策略

# 訓練全模型
for param in model.parameters():
    param.requires_grad = True
train_and_validate(model, train_loader, test_loader, criterion, optimizer, device)

# 訓練只調整 fc 層
for param in model.parameters():
    param.requires_grad = False
for param in model.fc.parameters():
    param.requires_grad = True
train_and_validate(model, train_loader, test_loader, criterion, optimizer, device)


Epoch 1, Train Loss: 2.4730, Val Loss: 2.0877, Val Acc: 0.4938, Val F1: 0.4861


KeyboardInterrupt: 

In [None]:
# 5.可視化訓練曲線
import matplotlib.pyplot as plt

# 畫圖函數
def plot_metrics(train_history, val_history, metric_name):
    plt.plot(train_history, label=f'Train {metric_name}')
    plt.plot(val_history, label=f'Validation {metric_name}')
    plt.xlabel('Epochs')
    plt.ylabel(metric_name)
    plt.legend()
    plt.title(f'{metric_name} over Epochs')
    plt.show()

# 繪製曲線
plot_metrics(train_loss_history, val_loss_history, 'Loss')
plot_metrics(train_acc_history, val_acc_history, 'Accuracy')
plot_metrics(val_f1_history, val_f1_history, 'Macro F1')
