In [1]:
import os
import glob
import json 
import time
import math
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tensorboardX import SummaryWriter


from byol_pytorch import BYOL
from torchvision import models

In [2]:
train_path = "/data/dlcv/hw4/office/train"
valid_path = "/data/dlcv/hw4/office/val"
train_csv = "/data/dlcv/hw4/office/train.csv"
valid_csv = "/data/dlcv/hw4/office/val.csv"
label2id_path = "./label2id.json"
ckpt_path = "./ckpt/finetune"
pretrained_path = "/data/dlcv/hw4/pretrain_model_SL.pt"
os.makedirs(ckpt_path, exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.set_device(3)
print('Device used:', device)

img_size = 128
train_bz = 64
valid_bz = 200

Device used: cuda


In [3]:
class dataset(Dataset):
    def __init__(self, inputPath, csvPath, label2idPath, transform=None):
        self.inputPath = inputPath
        self.transform = transform
        with open(label2idPath, 'r') as j:
            self.label2id = json.loads(j.read())
        self.inputName = []
        df = pd.read_csv(csvPath)
        for i in range(len(df)):
            self.inputName.append((df.loc[i].filename, self.label2id[df.loc[i].label]))
        print(self.inputName[0])
        
    def __getitem__(self, index):
        img = Image.open(os.path.join(self.inputPath, self.inputName[index][0]))
        if self.transform:
            img = self.transform(img)
        id = self.inputName[index][1]
        return img, id

    def __len__(self):
        return len(self.inputName)

img_transform = transforms.Compose([
    transforms.Resize(size=128),
    transforms.CenterCrop(128),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

def save_checkpoint(ckpt_path, model, optimizer):
    state = {'model_state_dict': model.state_dict(),
             'optimizer_state_dict': optimizer.state_dict(),}
    torch.save(state, ckpt_path)

def load_checkpoint(ckpt_path, device):
    ckpt = torch.load(ckpt_path, map_location=device)
    return ckpt

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [4]:
trainDS = dataset(inputPath=train_path, csvPath=train_csv, label2idPath=label2id_path, transform=img_transform)
trainLoader = DataLoader(dataset=trainDS, batch_size=train_bz, shuffle=True, num_workers=4)
validDS = dataset(inputPath=valid_path, csvPath=valid_csv, label2idPath=label2id_path, transform=img_transform)
validLoader = DataLoader(dataset=validDS, batch_size=valid_bz, shuffle=False, num_workers=1)
print('# images in trainset:', len(trainDS))
print('# images in validset:', len(validDS))

# dataiter = iter(validLoader)
# images, labels = dataiter.next()
# # print(labels)
# print('Image tensor in each batch:', images.shape, images.dtype)
# print('Label tensor in each batch:', labels.shape, labels.dtype)

('Couch00015.jpg', 13)
('Fork00005.jpg', 24)
# images in trainset: 3951
# images in validset: 406


In [5]:
class settingB(nn.Module):
    def __init__(self, ckpt_path=None) -> None:
        super().__init__()
        self.resnet = models.resnet50(weights=None)
        pretrained = torch.load(ckpt_path, map_location=device)
        self.resnet.load_state_dict(pretrained)
        # self.classifier = nn.Sequential(
        #     nn.Linear(self.resnet.fc.out_features, 65)
        # )
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])
        self.classifier = nn.Linear(2048, 65)
    def forward(self, x):
        x = self.resnet(x).flatten(1)
        return self.classifier(x)

In [6]:
# trainning
def train(model, epochs):
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-3)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,100], gamma=0.9)
    criterion = nn.CrossEntropyLoss()
    best_acc = 0.

    for epoch in range(epochs):
        model.train()        
        train_loss, valid_loss = 0, 0
        train_acc, valid_acc = 0, 0
        for i, (img, label) in enumerate(trainLoader):
            img, label = img.to(device), label.to(device)
            optimizer.zero_grad()
            output = model(img)
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            train_acc += pred.eq(label.view_as(pred)).sum().item()
            loss = criterion(output, label)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()                
        scheduler.step()
        train_loss /= (i+1)
        train_acc /= len(trainLoader.dataset)
        print("Epoch: {:02}".format(epoch))
        print(" | train_loss: {:6f}, train_acc: {:.2%}".format(train_loss, train_acc))
        model.eval()
        with torch.no_grad(): # This will free the GPU memory used for back-prop
            for i, (img, label) in enumerate(validLoader):
                img, label = img.to(device), label.to(device)
                output = model(img)
                valid_loss += criterion(output, label).item() # sum up batch loss
                pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
                valid_acc += pred.eq(label.view_as(pred)).sum().item()
                # resultClass = torch.argmax(output, dim=1)
                # acc += (resultClass == target).sum()
            valid_loss /= (i+1)
            valid_acc /= len(validLoader.dataset)
        
        save_checkpoint(os.path.join(ckpt_path, "settingB_last.pth"), model, optimizer)
        print(" | valid_loss: {:6f}, train_acc: {:.2%}".format(valid_loss, valid_acc))
        
        if valid_acc > best_acc:
            save_checkpoint(os.path.join(ckpt_path, "settingB_best.pth"), model, optimizer)
            print("  -> Save checkpoint for epoch {}".format(epoch+1))
            best_acc = valid_acc
        
        

In [7]:
model = settingB(ckpt_path=pretrained_path).to(device)
# print(model)
train(model, 50)

Epoch: 00
 | train_loss: 3.879607, train_acc: 8.63%
 | valid_loss: 4.378823, train_acc: 9.61%
  -> Save checkpoint for epoch 1
Epoch: 01
 | train_loss: 3.396489, train_acc: 15.92%
 | valid_loss: 3.832235, train_acc: 16.50%
  -> Save checkpoint for epoch 2
Epoch: 02
 | train_loss: 3.047138, train_acc: 23.03%
 | valid_loss: 3.175761, train_acc: 21.67%
  -> Save checkpoint for epoch 3
Epoch: 03
 | train_loss: 2.675387, train_acc: 30.35%
 | valid_loss: 3.280406, train_acc: 28.57%
  -> Save checkpoint for epoch 4
Epoch: 04
 | train_loss: 2.364651, train_acc: 35.69%
 | valid_loss: 3.161000, train_acc: 28.82%
  -> Save checkpoint for epoch 5
Epoch: 05
 | train_loss: 2.032221, train_acc: 44.82%
 | valid_loss: 2.727740, train_acc: 31.77%
  -> Save checkpoint for epoch 6
Epoch: 06
 | train_loss: 1.595208, train_acc: 54.90%
 | valid_loss: 2.805324, train_acc: 33.25%
  -> Save checkpoint for epoch 7
Epoch: 07
 | train_loss: 1.132917, train_acc: 67.22%
 | valid_loss: 3.138352, train_acc: 30.79%
Epo