In [None]:
# 從github上複製資料夾到當前路徑
!git clone https://github.com/brianshih95/Introduction-to-Machine-Learning.git

In [None]:
# 將當前路徑換成'Introduction-to-Machine-Learning/final_project'
import os

os.chdir("Introduction-to-Machine-Learning/final_project")

In [None]:
# 可以更改test data和model的路徑
test_folder = 'training/data/test'
model_path = 'training/model.pt'

In [None]:
# 為了使ImageFolder正常作用，在test data資料夾中加入一個dummy資料夾，並將全部資料放進去
import shutil

dummy_folder = test_folder + '/dummy'
if not os.path.exists(dummy_folder):
    os.mkdir(dummy_folder)

    jpg_files = [f for f in os.listdir(test_folder) if f.lower().endswith('.jpg')]

    for jpg_file in jpg_files:
        source_path = os.path.join(test_folder, jpg_file)
        dest_path = os.path.join(dummy_folder, jpg_file)
        shutil.move(source_path, dest_path)

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from PIL import Image

test_transform = transforms.Compose([
    transforms.Resize(384, interpolation=Image.BICUBIC),
    transforms.CenterCrop(384),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_dataset = datasets.ImageFolder('training/data/splited/valid', transform=test_transform)
test_dataset = datasets.ImageFolder(test_folder, transform=test_transform)

batch_size = 16
num_workers = 2
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [None]:
model = models.regnet_y_32gf()
num_classes = len(val_dataset.classes)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load(model_path))   # Load pretrained weight

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

model.eval()
predictions = []
with torch.no_grad():
    for inputs, _ in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, dim=1)
        predictions.extend(predicted.cpu().numpy())

file_names = [os.path.splitext(os.path.basename(path))[0]
                for path, _ in test_dataset.imgs]
class_labels = [os.path.basename(class_path)
                for class_path in val_dataset.class_to_idx.keys()]
predicted_labels = [class_labels[prediction] for prediction in predictions]

# generate prediction file
df = pd.DataFrame({'id': file_names, 'label': predicted_labels})
df.to_csv('predictions.csv', index=False)