In [1]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
from PIL import Image
import json
import matplotlib.pyplot as plt
import os
import pandas as pd
import time
import torchvision.models as models
from IPython.display import display
import numpy as np
import csv

In [2]:
# GPU
device_name = "cuda:0" if torch.cuda.is_available() else "cpu"
device = torch.device(device_name)
print("GPU state:", device)

GPU state: cuda:0


In [3]:
root_path = "/content/drive/MyDrive/VRDL_HW/HW1"
train_path = root_path + "/2021VRDL_HW1_datasets/training_images/"
test_path = root_path + "/2021VRDL_HW1_datasets/testing_images/"
class_path = root_path + "/2021VRDL_HW1_datasets/classes.txt"
train_label_path = root_path + "/2021VRDL_HW1_datasets"
train_label_path += "/training_labels.txt"

In [4]:
class_name = {}
class_list = []
file = open(class_path)
for line in file.readlines():
    line = line.split(".")
    class_name[line[1].strip("\n")] = int(line[0]) - 1
    class_list.append(line[1].strip("\n"))
file.close()

In [5]:
train_label = []
train_id = []
file = open(train_label_path)
for line in file.readlines():
    line = line.split()
    train_id.append(int(line[0].split(".")[0]))
    train_label.append(line[1].split(".")[1])

In [6]:
class TrainDataset(Dataset):
    def __init__(self, id, label, path, transform=None):
        self.path = path
        self.id = id
        self.labels = label
        self.transform = transform

    def __len__(self):
        return len(self.id)

    def __getitem__(self, idx):
        img_file_name = self.path + \
            str(self.id[idx]).zfill(4) + ".jpg"
        img = Image.open(img_file_name).convert("RGB")
        label = class_name[self.labels[idx]]

        if self.transform:
            img = self.transform(img)

        return img, label


class TestDataset(Dataset):
    def __init__(self, path, transform=None):
        self.path = path
        self.info = os.listdir(path)
        self.transform = transform

    def __len__(self):
        return len(self.info)

    def __getitem__(self, idx):
        img_file_name = self.path + self.info[idx]
        img = Image.open(img_file_name).convert("RGB")
        id = self.info[idx].split(".")[0]

        if self.transform:
            img = self.transform(img)

        return img, id

In [9]:
img_size = 256
img_crop_size = 224
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose(
    [
        transforms.Resize((img_size, img_size)),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(img_crop_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ]
)

test_transform = transforms.Compose(
    [
        transforms.Resize((img_size, img_size)),
        transforms.CenterCrop(img_crop_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ]
)

In [10]:
from torch.utils.data import random_split

train_data = TrainDataset(
    train_id, train_label, train_path, transform=train_transform
    )
test_data = TestDataset(
    test_path, transform=test_transform
    )
# split validation
train_data, valid_data = random_split(train_data, [2800, 200])

batch_size = 32
trainloader = DataLoader(
    train_data, batch_size=batch_size, shuffle=True
    )
validloader = DataLoader(
    valid_data, batch_size=32, shuffle=False
    )
testloader = DataLoader(
    test_data, batch_size=32, shuffle=False
    )

In [None]:
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 200)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(
    model.parameters(), lr=0.001, momentum=0.9
    )
exp_lr_scheduler = lr_scheduler.StepLR(
    optimizer, step_size=6, gamma=0.1
    )
num_epochs = 50

In [12]:
def cal_accuracy(net, loader):
    correct = 0.0
    total = 0.0
    net.eval()
    with torch.no_grad():
        for img, labels in loader:
            img = img.to(device)
            labels = labels.to(device)
            outputs = net(img)
            _, predicted = torch.max(outputs.data, 1)
            correct += (labels == predicted).sum().item()
            total += len(labels)
    return correct / total

In [None]:
now_max_acc = 0.5
model.to(device)
model.train()
begin = time.time()
print("----------Training Begin----------")
for epoch in range(num_epochs):
    since = time.time()
    running_loss = 0.0
    running_correct = 0.0
    running_total = 0.0
    now = 0
    for inputs, labels in trainloader:

        # get the inputs and assign them to cuda
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model.forward(inputs)
        _, predicted = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # calculate the loss/acc later
        running_loss += loss.item()
        running_correct += (labels == predicted).sum().item()
        running_total += len(labels)

    epoch_loss = running_loss / len(trainloader)
    epoch_acc = running_correct / running_total
    valid_acc = cal_accuracy(model, validloader)
    epoch_duration = time.time() - since
    print(
        "Epoch %s/%s, loss: %.4f, acc: %.4f valid acc: %.4f"
        % (epoch + 1, num_epochs, epoch_loss, epoch_acc, valid_acc)
    )

    exp_lr_scheduler.step()
    model_save_path = root_path + "/model.pth"
    if valid_acc >= now_max_acc:
        torch.save(model, model_save_path)
        now_max_acc = valid_acc

print(
    "Finished Training, total time: {}".format(time.time() - begin)
    )

In [None]:
# Load model
model_1 = torch.load(root_path + "/model_1.pth")
model_2 = torch.load(root_path + "/model_2.pth")
model_3 = torch.load(root_path + "/model_3.pth")
model_4 = torch.load(root_path + "/model_4.pth")

In [None]:
def get_predict(net):
    print("----------Start Predicting----------")
    begin = time.time()
    since = time.time()
    result = []
    net.eval()
    with torch.no_grad():
        for img, id in testloader:

            img = img.to(device)

            outputs = net(img)
            _, predicted = torch.max(outputs.data, 1)

            for i in range(len(predicted)):
                pred = []
                pred.append(id[i])
                pred.append(list(class_name)[predicted[i]])
                result.append(pred)
    print(
        "Predicting finished, time needed: {}sec"\
        .format(time.time() - begin)
        )
    ans = {}
    for item in result:
        ans[item[0] + ".jpg"] = \
            str(class_name[item[1]] + 1).zfill(3) + "." + item[1]
    return ans


ans_1 = get_predict(model_1)
ans_2 = get_predict(model_2)
ans_3 = get_predict(model_3)
ans_4 = get_predict(model_4)

----------Start Predicting----------
Predicting finished, time needed: 47.430903911590576sec
----------Start Predicting----------
Predicting finished, time needed: 46.83012628555298sec
----------Start Predicting----------
Predicting finished, time needed: 46.89465355873108sec
----------Start Predicting----------
Predicting finished, time needed: 47.23994326591492sec


In [None]:
order_path = root_path + "/answer_order.txt"
result_path = root_path + "/answer.txt"

order_file = open(order_path, "r")
txt_result = []
for line in order_file.readlines():

    predict_1 = ans_1[line.split()[0]]
    predict_2 = ans_2[line.split()[0]]
    predict_3 = ans_3[line.split()[0]]
    predict_4 = ans_4[line.split()[0]]

    # 4 models vote
    now_predict = predict_1  # default predict_1 is the most
    if (predict_2 == predict_3 and predict_3 == predict_4) or (
        predict_2 == predict_3 and predict_1 != predict_4
    ):
        # predict_2 is the most
        now_predict = predict_2
    elif (predict_1 != predict_2 and predict_3 == predict_4) or (
        predict_1 != predict_3 and predict_2 == predict_4
    ):
        # predict_4 is the most
        now_predict = predict_4

    txt_result.append(line.split()[0] + " " + now_predict)
order_file.close()

result_file = open(result_path, "w")
for item in txt_result:
    result_file.write(str(item) + "\n")
result_file.close()