In [1]:
import os
import glob
import json 
import time
import math
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tensorboardX import SummaryWriter


from byol_pytorch import BYOL
from torchvision import models

In [50]:
train_path = "/data/dlcv/hw4/office/train"
valid_path = "/data/dlcv/hw4/office/val"
train_csv = "/data/dlcv/hw4/office/train.csv"
valid_csv = "/data/dlcv/hw4/office/val.csv"
label2id_path = "./label2id.json"
ckpt_path = "./ckpt/finetune"
pretrained_path = "/data/jhccc/dlcv/hw4-jhc659/ckpt/resnetLast.pth"
retrain_path = "/data/jhccc/dlcv/hw4-jhc659/ckpt/finetune/settingC2_best.pth"
os.makedirs(ckpt_path, exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.set_device(4)
print('Device used:', device)

img_size = 128
train_bz = 64
valid_bz = 200

Device used: cuda


In [7]:
class dataset(Dataset):
    def __init__(self, inputPath, csvPath, label2idPath, transform=None):
        self.inputPath = inputPath
        self.transform = transform
        with open(label2idPath, 'r') as j:
            self.label2id = json.loads(j.read())
        self.inputName = []
        df = pd.read_csv(csvPath)
        for i in range(len(df)):
            self.inputName.append((df.loc[i].filename, self.label2id[df.loc[i].label]))
        print(self.inputName[0])
        
    def __getitem__(self, index):
        img = Image.open(os.path.join(self.inputPath, self.inputName[index][0]))
        if self.transform:
            img = self.transform(img)
        id = self.inputName[index][1]
        return img, id

    def __len__(self):
        return len(self.inputName)

img_transform = transforms.Compose([
    transforms.Resize(size=128),
    transforms.CenterCrop(128),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

def save_checkpoint(ckpt_path, model, optimizer):
    state = {'model_state_dict': model.state_dict(),
             'optimizer_state_dict': optimizer.state_dict(),}
    torch.save(state, ckpt_path)

def load_checkpoint(ckpt_path, device=device):
    ckpt = torch.load(ckpt_path, map_location=device)
    return ckpt

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [4]:
trainDS = dataset(inputPath=train_path, csvPath=train_csv, label2idPath=label2id_path, transform=img_transform)
trainLoader = DataLoader(dataset=trainDS, batch_size=train_bz, shuffle=True, num_workers=4)

print('# images in trainset:', len(trainDS))
# print('# images in validset:', len(validDS))

# dataiter = iter(validLoader)
# images, labels = dataiter.next()
# # print(labels)
# print('Image tensor in each batch:', images.shape, images.dtype)
# print('Label tensor in each batch:', labels.shape, labels.dtype)

('Couch00015.jpg', 13)
('Fork00005.jpg', 24)
# images in trainset: 3951
# images in validset: 406


In [5]:
class settingC(nn.Module):
    def __init__(self, ckpt_path=None) -> None:
        super().__init__()
        self.resnet = models.resnet50(weights=None)
        if ckpt_path is not None:
            ckpt = load_checkpoint(ckpt_path, device)
            self.resnet.load_state_dict(ckpt['model_state_dict'])
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])
        # self.classifier = nn.Sequential(
        #     nn.Linear(self.resnet.fc.out_features, 65)
        # )
        self.classifier = nn.Linear(2048, 65)
    def forward(self, x):
        x = self.resnet(x).flatten(1)
        return self.classifier(x)

In [6]:
# trainning
def train(model, epochs, retrain=False):
    model = model.to(device)
    if retrain:
        optimizer = torch.optim.RAdam(model.parameters(), lr=3e-4, weight_decay=5e-3)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,100], gamma=0.9)
    else:         
        optimizer = torch.optim.Adam(model.parameters(), lr=3e-3)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,100], gamma=0.9)
    criterion = nn.CrossEntropyLoss()
    best_acc = 0.

    for epoch in range(epochs):
        model.train()        
        train_loss, valid_loss = 0, 0
        train_acc, valid_acc = 0, 0
        for i, (img, label) in enumerate(trainLoader):
            img, label = img.to(device), label.to(device)
            optimizer.zero_grad()
            output = model(img)
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            train_acc += pred.eq(label.view_as(pred)).sum().item()
            loss = criterion(output, label)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()                
        scheduler.step()
        train_loss /= (i+1)
        train_acc /= len(trainLoader.dataset)
        print("Epoch: {:02}".format(epoch))
        print(" | train_loss: {:6f}, train_acc: {:.2%}".format(train_loss, train_acc))
        model.eval()
        with torch.no_grad(): # This will free the GPU memory used for back-prop
            for i, (img, label) in enumerate(validLoader):
                img, label = img.to(device), label.to(device)
                output = model(img)
                valid_loss += criterion(output, label).item() # sum up batch loss
                pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
                valid_acc += pred.eq(label.view_as(pred)).sum().item()
                # resultClass = torch.argmax(output, dim=1)
                # acc += (resultClass == target).sum()
            valid_loss /= (i+1)
            valid_acc /= len(validLoader.dataset)
        
        save_checkpoint(os.path.join(ckpt_path, "settingC_last.pth"), model, optimizer)
        print(" | valid_loss: {:6f}, valid_acc: {:.2%}".format(valid_loss, valid_acc))
        
        if valid_acc > best_acc:
            save_checkpoint(os.path.join(ckpt_path, "settingC_best.pth"), model, optimizer)
            print("  -> Save checkpoint for epoch {}".format(epoch+1))
            best_acc = valid_acc
        
        

In [11]:
model = settingC(ckpt_path=pretrained_path).to(device)
# print(model)
train(model, 100)

Epoch: 00
 | train_loss: 31.810889, train_acc: 2.15%
 | valid_loss: 12.474689, valid_acc: 4.19%
  -> Save checkpoint for epoch 1
Epoch: 01
 | train_loss: 6.774861, train_acc: 4.30%
 | valid_loss: 5.965753, valid_acc: 5.17%
  -> Save checkpoint for epoch 2
Epoch: 02
 | train_loss: 4.845631, train_acc: 6.91%
 | valid_loss: 4.518285, valid_acc: 8.37%
  -> Save checkpoint for epoch 3
Epoch: 03
 | train_loss: 5.058013, train_acc: 10.00%
 | valid_loss: 4.210298, valid_acc: 12.07%
  -> Save checkpoint for epoch 4
Epoch: 04
 | train_loss: 4.810023, train_acc: 12.43%
 | valid_loss: 5.451992, valid_acc: 13.55%
  -> Save checkpoint for epoch 5
Epoch: 05
 | train_loss: 4.355129, train_acc: 17.11%
 | valid_loss: 5.255645, valid_acc: 16.01%
  -> Save checkpoint for epoch 6
Epoch: 06
 | train_loss: 4.437893, train_acc: 18.07%
 | valid_loss: 4.348756, valid_acc: 17.73%
  -> Save checkpoint for epoch 7
Epoch: 07
 | train_loss: 3.866360, train_acc: 24.17%
 | valid_loss: 4.455556, valid_acc: 20.20%
  -> 

In [26]:
retrain_model = settingC().to(device)
retrain_model.load_state_dict(torch.load(retrain_path, map_location=device)['model_state_dict'])
# print(retrain_model)
train(retrain_model, 50, retrain=True)

Epoch: 00
 | train_loss: 0.035047, train_acc: 99.24%
 | valid_loss: 3.636503, valid_acc: 46.80%
  -> Save checkpoint for epoch 1
Epoch: 01
 | train_loss: 0.032012, train_acc: 99.27%
 | valid_loss: 3.483169, valid_acc: 47.29%
  -> Save checkpoint for epoch 2
Epoch: 02
 | train_loss: 0.025648, train_acc: 99.37%
 | valid_loss: 3.281645, valid_acc: 46.31%
Epoch: 03
 | train_loss: 0.030107, train_acc: 99.32%
 | valid_loss: 3.656586, valid_acc: 47.29%
Epoch: 04
 | train_loss: 0.032642, train_acc: 99.34%
 | valid_loss: 3.376266, valid_acc: 47.29%
Epoch: 05
 | train_loss: 0.031442, train_acc: 99.39%
 | valid_loss: 3.323275, valid_acc: 46.06%
Epoch: 06
 | train_loss: 0.039632, train_acc: 99.37%
 | valid_loss: 3.038504, valid_acc: 46.06%
Epoch: 07
 | train_loss: 0.041333, train_acc: 99.32%
 | valid_loss: 3.041623, valid_acc: 46.06%
Epoch: 08
 | train_loss: 0.041243, train_acc: 99.29%
 | valid_loss: 2.879581, valid_acc: 44.83%
Epoch: 09
 | train_loss: 0.052448, train_acc: 99.32%
 | valid_loss: 2.

In [51]:
class testdataset(Dataset):
    def __init__(self, inputPath, csvPath, transform=None):
        self.inputPath = inputPath
        self.transform = transform
        # with open(label2idPath, 'r') as j:
        #     self.label2id = json.loads(j.read())
        # self.inputName = sorted(os.listdir(inputPath))
        self.inputName = []
        self.df = pd.read_csv(csvPath)
        for i in range(len(self.df)):
            self.inputName.append((self.df.loc[i].id, self.df.loc[i].filename))
        print(self.inputName[0])
        
    def __getitem__(self, index):
        img = Image.open(os.path.join(self.inputPath, self.inputName[index][1]))
        if self.transform:
            img = self.transform(img)
        id = self.inputName[index][0]
        filename = self.inputName[index][1]
        return img, id, filename

    def __len__(self):
        return len(self.inputName)

In [52]:
validDS = testdataset(inputPath=valid_path, csvPath=valid_csv, transform=img_transform)
validLoader = DataLoader(dataset=validDS, batch_size=1, shuffle=False, num_workers=1)

(0, 'Fork00005.jpg')


In [53]:
test_model = settingC().to(device)
test_model.load_state_dict(torch.load(retrain_path, map_location=device)['model_state_dict'])

<All keys matched successfully>

In [54]:
with open(label2id_path, 'r') as j:
    label2num = json.loads(j.read())
num2label = dict([val, key] for key, val in label2num.items())
num2label

{13: 'Couch',
 27: 'Helmet',
 48: 'Refrigerator',
 0: 'Alarm_Clock',
 4: 'Bike',
 5: 'Bottle',
 7: 'Calculator',
 10: 'Chair',
 36: 'Mouse',
 34: 'Monitor',
 59: 'Table',
 42: 'Pen',
 43: 'Pencil',
 22: 'Flowers',
 52: 'Shelf',
 32: 'Laptop',
 56: 'Speaker',
 54: 'Sneakers',
 45: 'Printer',
 8: 'Calendar',
 3: 'Bed',
 30: 'Knives',
 1: 'Backpack',
 41: 'Paper_Clip',
 9: 'Candles',
 55: 'Soda',
 11: 'Clipboards',
 24: 'Fork',
 18: 'Exit_Sign',
 31: 'Lamp_Shade',
 63: 'Trash_Can',
 12: 'Computer',
 50: 'Scissors',
 64: 'Webcam',
 53: 'Sink',
 44: 'Postit_Notes',
 25: 'Glasses',
 20: 'File_Cabinet',
 47: 'Radio',
 6: 'Bucket',
 16: 'Drill',
 15: 'Desk_Lamp',
 62: 'Toys',
 29: 'Keyboard',
 38: 'Notebook',
 49: 'Ruler',
 61: 'ToothBrush',
 35: 'Mop',
 21: 'Flipflops',
 39: 'Oven',
 58: 'TV',
 17: 'Eraser',
 60: 'Telephone',
 28: 'Kettle',
 14: 'Curtains',
 37: 'Mug',
 19: 'Fan',
 46: 'Push_Pin',
 2: 'Batteries',
 40: 'Pan',
 33: 'Marker',
 57: 'Spoon',
 51: 'Screwdriver',
 26: 'Hammer',
 23

In [58]:
criterion = nn.CrossEntropyLoss()
test_model.eval()
result = {"id":[], "filename":[], "label":[]}
with torch.no_grad(): # This will free the GPU memory used for back-prop
    for i, (img, id, filename) in enumerate(validLoader):
        img = img.to(device)
        output = test_model(img)
        # valid_loss += criterion(output, label).item() # sum up batch loss
        pred_label = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        # valid_acc += pred.eq(label.view_as(pred)).sum().item()
        result["id"].append(int(id))
        result["filename"].append(filename[0])
        result["label"].append(num2label[int(pred_label)])


In [64]:
origin = pd.read_csv(valid_csv)
test = pd.read_csv("./pred.csv")
result = pd.DataFrame(result)
print(sum(origin.label == result.label)/len(origin.label))
print(sum(origin.label == test.label)/len(origin.label))


0.49261083743842365
0.49261083743842365


In [60]:
result.to_csv("./output.csv", index=False)