In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import itertools
import matplotlib.pyplot as plt
import os
import numpy as np
import torch.nn as nn
import pandas as pd
import random

from PIL import Image

In [2]:
seed = 111

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)  # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)

In [3]:
train_data = list()
val_data = list()
train_label = list()
val_label = list()

books = pd.read_csv('./data/v4/books.csv')
class_list = list(books[(books['category_high'].notna()) & (books['category_high'] != 'others') & (books['img_path'].notna())]['category_high'].value_counts().index)
class_dict = {label: i for i, label in enumerate(class_list)}
print(len(class_list))

for class_name in class_list:
    imgs = []
    img_per_class = books[(books['category_high'] == class_name) & (books['img_path'].notna())]['img_path']
    num_img_per_class = len(img_per_class)
    imgs.extend(('./data/' + img_per_class.values))
    
    for i, img in enumerate(imgs):
      if i < int(0.8 * num_img_per_class):
        train_data.append(Image.open(img).convert('RGB'))
        train_label.append(class_dict[class_name])
      else:
        val_data.append(Image.open(img).convert('RGB'))
        val_label.append(class_dict[class_name])

80


In [4]:
transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225])])

In [5]:
class CustomDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)
    
    def num_classes(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img = self.data[idx]
        label = self.labels[idx]
        if self.transform:
          img = self.transform(img)
        return img, label

In [6]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True, norm="bnorm", relu=True):
        super().__init__()

        layers = []
        layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
                             kernel_size=kernel_size, stride=stride, padding=padding,
                             bias=bias)]

        if norm == "bnorm":
            layers += [nn.BatchNorm2d(num_features=out_channels)]

        if relu:
            layers += [nn.ReLU()]

        self.cbr = nn.Sequential(*layers)

    def forward(self, x):
        return self.cbr(x)

In [7]:
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, 
                 padding=1, bias=True, norm="bnorm", short_cut=False, relu=True, init_block=False):
        super().__init__()

        layers = []


        if init_block:
          init_stride = 2
        else:
          init_stride = stride

        # 1st conv
        layers += [ConvBlock(in_channels=in_channels, out_channels=out_channels,
                         kernel_size=kernel_size, stride=init_stride, padding=padding,
                         bias=bias, norm=norm, relu=relu)]

        # 2nd conv
        layers += [ConvBlock(in_channels=out_channels, out_channels=out_channels,
                         kernel_size=kernel_size, stride=stride, padding=padding,
                         bias=bias, norm=norm, relu=False)]

        self.resblk = nn.Sequential(*layers)
        
        
        self.short_cut = nn.Conv2d(in_channels, out_channels, (1,1), stride=2)

    def forward(self, x, short_cut=False):
        if short_cut:
            return self.short_cut(x) + self.resblk(x)
        else:
            return x + self.resblk(x) # residual connection

In [8]:
class ResNet(nn.Module):
    def __init__(self, in_channels, out_channels, nker=64, norm="bnorm", nblk=[3,4,6,3]):
        super(ResNet, self).__init__()

        self.enc = ConvBlock(in_channels, nker, kernel_size=7, stride=2, padding=1, bias=True, norm=None, relu=True)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        res_1 = ResBlock(nker, nker, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=True)
        self.res_1 = nn.Sequential(*[res_1 for _ in range(nblk[0])])

        res_2 = ResBlock(nker*2, nker*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=True)
        self.res_2_up = ResBlock(nker, nker*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=True, init_block=True)
        self.res_2 = nn.Sequential(*[res_2 for _ in range(nblk[1]-1)])

        res_3 = ResBlock(nker*2*2, nker*2*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=True)
        self.res_3_up = ResBlock(nker*2, nker*2*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=True, init_block=True)
        self.res_3 = nn.Sequential(*[res_3 for _ in range(nblk[2]-1)])

        res_4 = ResBlock(nker*2*2*2, nker*2*2*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=True, init_block=True)
        self.res_4_up = ResBlock(nker*2*2, nker*2*2*2, kernel_size=3, stride=1, padding=1, bias=True, norm=norm, relu=True)
        self.res_4 = nn.Sequential(*[res_4 for _ in range(nblk[3]-1)])

        self.avg_pooling = nn.AdaptiveAvgPool2d(output_size=1)
        self.fc = nn.Linear(nker*2*2*2, 80)

    def forward(self, x):
        x = self.enc(x)
        x = self.max_pool(x)
        x = self.res_1(x)
        x = self.max_pool(x)

        x = self.res_2_up(x, short_cut=True)
        x = self.res_2(x)
        x = self.max_pool(x)

        x = self.res_3_up(x, short_cut=True)
        x = self.res_3(x)
        x = self.max_pool(x)

        x = self.res_4_up(x, short_cut=True)
        x = self.res_4(x)
        
        x = self.avg_pooling(x)
        x = x.view(x.shape[0], -1)
        out = self.fc(x)
        return out

In [9]:
from torch.optim import Adam

# Build user-defined ResNet model
model_scratch = ResNet(3, 80)

criterion = nn.CrossEntropyLoss()
optimizer = Adam(model_scratch.parameters(), lr=1e-4)

In [10]:
log_dir ='./log'

In [11]:
# quickdraw train/validatoin dataset and dataloader
b_train_dataset = CustomDataset(train_data, train_label, transform)
b_val_dataset = CustomDataset(val_data, val_label, transform)

b_train_dataloader = DataLoader(b_train_dataset, batch_size=4, shuffle=True, drop_last=True)
b_val_dataloader = DataLoader(b_val_dataset, batch_size=4, shuffle=True, drop_last=True)

In [13]:
# Misc
class AverageMeter(object):
  """Computes and stores the average and current value"""
  def __init__(self):
      self.reset()

  def reset(self):
    self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

  def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = self.sum / self.count

In [14]:
model_scratch = model_scratch.cuda()

In [15]:
# Main
os.makedirs(log_dir, exist_ok=True)

with open(os.path.join(log_dir, 'scratch_train_log.csv'), 'w') as log:
  # Training
  model_scratch.train()
  for iter, (img, label) in enumerate(b_train_dataloader):
    # optimizer에 저장된 미분값을 0으로 초기화
    optimizer.zero_grad()

    img = img.cuda()
    label = label.cuda()

    # 모델에 이미지 forward
    pred_logit = model_scratch(img)

    # loss 값 계산
    loss = criterion(pred_logit, label)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Accuracy 계산
    pred_label = torch.argmax(pred_logit, 1)
    acc = (pred_label == label).sum().item() / len(img)

    train_loss = loss.item()
    train_acc = acc

    # Validation 
    if (iter % 20 == 0) or (iter == len(b_train_dataloader)-1):
      model_scratch.eval()
      valid_loss, valid_acc = AverageMeter(), AverageMeter()

      for img, label in b_val_dataloader:
        # Validation에 사용하기 위한 image, label 처리 (필요한 경우, data type도 변경해주세요)
        img, label = img.float().cuda(), label.long().cuda()

        # 모델에 이미지 forward (gradient 계산 X)
        with torch.no_grad():
          pred_logit = model_scratch(img)

        # loss 값 계산
        loss = criterion(pred_logit, label)

        # Accuracy 계산
        pred_label = torch.argmax(pred_logit, 1)
        acc = (pred_label == label).sum().item() / len(img)

        valid_loss.update(loss.item(), len(img))
        valid_acc.update(acc, len(img))

      valid_loss = valid_loss.avg
      valid_acc = valid_acc.avg

      print("Iter [%3d/%3d] | Train Loss %.4f | Train Acc %.4f | Valid Loss %.4f | Valid Acc %.4f" %
            (iter, len(b_train_dataloader), train_loss, train_acc, valid_loss, valid_acc))
      
      # Train Log Writing
      log.write('%d,%.4f,%.4f,%.4f,%.4f\n'%(iter, train_loss, train_acc, valid_loss, valid_acc))

Iter [  0/9932] | Train Loss 7.3178 | Train Acc 0.0000 | Valid Loss 4.1597 | Valid Acc 0.0004
Iter [ 20/9932] | Train Loss 2.3804 | Train Acc 0.5000 | Valid Loss 2.0542 | Valid Acc 0.6863
Iter [ 40/9932] | Train Loss 0.1201 | Train Acc 1.0000 | Valid Loss 2.0200 | Valid Acc 0.6862
Iter [ 60/9932] | Train Loss 2.2997 | Train Acc 0.5000 | Valid Loss 1.7885 | Valid Acc 0.6862
Iter [ 80/9932] | Train Loss 1.7867 | Train Acc 0.7500 | Valid Loss 1.7928 | Valid Acc 0.6863
Iter [100/9932] | Train Loss 0.2100 | Train Acc 1.0000 | Valid Loss 1.8098 | Valid Acc 0.6863
Iter [120/9932] | Train Loss 1.8183 | Train Acc 0.7500 | Valid Loss 1.8094 | Valid Acc 0.6863
Iter [140/9932] | Train Loss 0.3371 | Train Acc 1.0000 | Valid Loss 1.7518 | Valid Acc 0.6862
Iter [160/9932] | Train Loss 3.3588 | Train Acc 0.2500 | Valid Loss 1.7484 | Valid Acc 0.6863
Iter [180/9932] | Train Loss 3.4778 | Train Acc 0.5000 | Valid Loss 1.7351 | Valid Acc 0.6864
Iter [200/9932] | Train Loss 1.0260 | Train Acc 0.7500 | Val

In [88]:
torch.save(model_scratch.state_dict(), "./model_scratch.pt")

### Preprocessing
1차 : Resnet model

In [39]:
imgs_for_pred_list = []
imgs_for_pred = books[(books['img_path'].notna()) & ((books['category_high'].isna()))]['img_path']
imgs_for_pred_list.extend(('./data/' + imgs_for_pred.values))

len(imgs_for_pred_list)

imgs_for_prediction = []
for img in imgs_for_pred_list:
    imgs_for_prediction.append(Image.open(img).convert('RGB'))

# quickdraw train/validatoin dataset and dataloader
b_pred = CustomDataset(imgs_for_prediction, [-1]*len(imgs_for_prediction), transform)
b_pred_dataloader = DataLoader(b_pred, batch_size=4, shuffle=False, drop_last=True)

In [67]:
books = pd.read_csv('./data/v4/books.csv')
convert_class_dict = {v:k for k,v in class_dict.items()}

for i, (img, label) in enumerate(b_pred_dataloader):
    img, label = img.float().cuda(), label.long().cuda()
    
    
    with torch.no_grad():
        pred_logit = model_scratch(img)
    
    pred_label = torch.argmax(pred_logit, 1)

    for idx, l in enumerate(pred_label):
        books.loc[(books['img_path'] == imgs_for_pred.values[4 * i + idx]), 'category_high'] = convert_class_dict.get(l.item())

2차 : author

In [63]:
BA = list(books[books['category_high'].notna()]['book_author'].value_counts().index)

for author in BA:
    if not (books[books['book_author'] == author]['category_high'].value_counts().empty):
        books.loc[(books['img_path'].isna()) & (books['category_high'].isna()) & (books['book_author'] == author), 'category_high'] = books[books['book_author'] == author]['category_high'].value_counts().index[0]

3차 : publisher 

In [77]:
pub = books['publisher'].unique()

for p in pub:
    if not (books[(books['publisher'] == p) & (books['category_high'].notna())]['category_high'].value_counts().empty):
        books.loc[(books['publisher'] == p) & (books['category_high'].isna()), 'category_high'] = books[(books['publisher'] == p) & (books['category_high'].notna())]['category_high'].value_counts().index[0]

4차 : others

In [83]:
books.loc[books['category_high'].isna(), 'category_high'] = 'others'

In [86]:
books.to_csv('./data/v5/books.csv', index=False)