本檔案會產生最後的預測檔案：`test_predictions_ensemble_mixed2.csv`。流程如下：

1. 首先，我會利用已經訓練好的 ResNet50 模型，對 `test` 資料夾中的所有圖片進行預測，並將預測結果存成 `inference_results_resnet50_dropout.csv`。

2. 接著，透過已經訓練好的 Swin Transformer 模型，對 `test` 資料夾中的所有圖片進行預測，並將預測結果存成 `inference_results_swin.csv`。

3. 最終，我會將這兩個模型的預測結果進行加權平均，並將結果存成 `test_predictions_ensemble_mixed2.csv`。

Inference 所需要的模型可以分別透過 `training_resnet50_dropout.ipynb` 和 `training_swin.ipynb` 產生。

In [2]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import models
import torch.nn as nn
from tqdm import tqdm

from datasets import TestDataset
import json
import os

class MultiLabelResNet50(nn.Module):
    def __init__(self, num_classes, pretrained=False):
        super(MultiLabelResNet50, self).__init__()
        self.model = models.resnet50(pretrained=pretrained)
        in_features = self.model.fc.in_features
        self.model.fc = nn.Sequential(
            nn.Dropout(p=0.5),  # 添加 Dropout 层
            nn.Linear(in_features, num_classes)
        )

    def forward(self, x):
        x = self.model(x)
        return x

def main():
    if not os.path.exists('class_mapping.json'):
        print("错误: 'class_mapping.json' 文件不存在。")
        return

    with open('class_mapping.json', 'r') as f:
        class_mapping = json.load(f)

    num_classes = len(class_mapping)
    class_names = [f'class_{i}' for i in range(num_classes)]

    test_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    test_data_dir = 'test_data'
    if not os.path.exists(test_data_dir):
        print(f"'{test_data_dir}' 不存在。")
        return

    test_dataset = TestDataset(img_dir=test_data_dir, transform=test_transforms)
    test_loader = DataLoader(
        test_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=4,
        pin_memory=True
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = MultiLabelResNet50(num_classes=num_classes, pretrained=False).to(device)

    model_path = 'model_epoch_39.pth'
    if not os.path.exists(model_path):
        print(f"'{model_path}' 不存在。")
        return

    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    predictions = []
    filenames = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Predicting"):
            batch_filenames, inputs = batch
            inputs = inputs.to(device)

            outputs = model(inputs)
            probs = torch.sigmoid(outputs).cpu().numpy()

            predictions.append(probs)
            filenames.extend(batch_filenames)

    if predictions:
        predictions = np.vstack(predictions)
        results_df = pd.DataFrame(predictions, columns=class_names)
        results_df.insert(0, 'filename', filenames)
    else:
        print("錯誤：無法生成預測結果")
        return

    # 保存预测结果为 CSV 文件
    output_csv = 'inference_results_resnet50_dropout.csv'
    results_df.to_csv(output_csv, index=False)
    print(f"結果保存至 '{output_csv}'。")

if __name__ == '__main__':
    main()




  model.load_state_dict(torch.load(model_path, map_location=device))
Predicting: 100%|██████████| 275/275 [07:17<00:00,  1.59s/it]


結果保存至 'inference_results_resnet50_dropout.csv'。


In [3]:
import os
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from timm import create_model
import torch.nn.functional as F

class MultiLabelSwinTransformer(nn.Module):
    def __init__(self, num_classes, pretrained=True, freeze_layers=False):
        super(MultiLabelSwinTransformer, self).__init__()
        # 使用 Swin Base (224×224)
        self.model = create_model(
            'swin_base_patch4_window7_224',
            pretrained=pretrained
        )
        in_features = self.model.head.in_features
        self.model.head = nn.Identity()

        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features, num_classes)
        )

        if freeze_layers:
            for name, param in self.model.named_parameters():
                if "layers.0" in name or "layers.1" in name:
                    param.requires_grad = False

    def forward(self, x):
        # x shape: [batch_size, 3, 224, 224]

        x = self.model.forward_features(x)
        # 此時 x shape: [batch_size, 7, 7, 1024]

        x = x.permute(0, 3, 1, 2)
        # 現在 x shape: [batch_size, 1024, 7, 7]

        x = F.adaptive_avg_pool2d(x, (1, 1))
        # shape: [batch_size, 1024, 1, 1]

        x = x.flatten(1)
        # shape: [batch_size, 1024]

        x = self.classifier(x)
        return x

class InferenceDataset(Dataset):
    def __init__(self, img_dir, filenames, transform=None):

        self.img_dir = img_dir
        self.filenames = filenames
        self.transform = transform

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_name = self.filenames[idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, img_name


def main():
    with open('class_mapping.json', 'r', encoding='utf-8') as f:
        class_mapping = json.load(f)
    num_classes = len(class_mapping)
    class_names = list(class_mapping.keys())

    test_dir = 'test_data'
    test_filenames = [f for f in os.listdir(test_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    test_filenames.sort()

    print(f"Number of test samples: {len(test_filenames)}")

    # 定義圖像轉換 (與訓練時相同，但通常不需要數據增強)
    test_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225]),
    ])

    test_dataset = InferenceDataset(
        img_dir=test_dir,
        filenames=test_filenames,
        transform=test_transforms
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=64,
        shuffle=False,
        num_workers=4,
        pin_memory=True
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)

    model = MultiLabelSwinTransformer(
        num_classes=num_classes,
        pretrained=False,
        freeze_layers=False
    ).to(device)

    model_path = 'model_epoch_56_swin_no_freeze.pth'
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"模型檔案 {model_path} 不存在。請確認路徑是否正確。")
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    print(f"Loaded model weights from {model_path}")

    all_filenames = []
    all_probs = []

    with torch.no_grad():
        for inputs, filenames in tqdm(test_loader, desc="推論中"):
            inputs = inputs.to(device)
            outputs = model(inputs)
            probs = torch.sigmoid(outputs).cpu().numpy()
            all_filenames.extend(filenames)
            all_probs.append(probs)

    all_probs = np.vstack(all_probs)

    results_df = pd.DataFrame(all_probs, columns=class_names)
    results_df.insert(0, 'filename', all_filenames)

    output_csv = 'inference_results_swin.csv'
    results_df.to_csv(output_csv, index=False)
    print(f"推論結果已保存至 {output_csv}")

if __name__ == '__main__':
    main()


Number of test samples: 8784
Using device: cuda


  model.load_state_dict(torch.load(model_path, map_location=device))


Loaded model weights from model_epoch_56_swin_no_freeze.pth


推論中: 100%|██████████| 138/138 [03:08<00:00,  1.37s/it]


推論結果已保存至 inference_results_swin.csv


In [4]:
import pandas as pd

df_resnet50 = pd.read_csv('inference_results_resnet50_dropout.csv')
df_do = pd.read_csv('inference_results_swin.csv')

assert df_do.shape == df_resnet50.shape, "DataFrames have different shapes!"

df_numeric_avg = (df_do.select_dtypes(include='number') + df_resnet50.select_dtypes(include='number')) / 2
df_non_numeric = df_do.select_dtypes(exclude='number')

df = pd.concat([df_non_numeric, df_numeric_avg], axis=1)

df.to_csv('test_predictions_ensemble_mixed2.csv', index=False)
print("Averaged ensemble predictions saved to 'test_predictions_ensemble_mixed2.csv'.")

Averaged ensemble predictions saved to 'test_predictions_ensemble_mixed2.csv'.
