### setup

In [1]:
import argparse
import glob
import os
import pickle
import random
import shutil
import time

In [2]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import cv2

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import Sampler
import torchvision.transforms as transforms

In [4]:
parser = {
    'train_path': '../data/train4320/',
    'train_masks_path': '../data/train4320_masks/',
    'val_path': '../data/val768/',
    'val_masks_path': '../data/val768_masks/',
    'split_data': False,
    'batch_size': 2,
    'log_every': 10,
    'train': True,
    'model_name': '',
    'test': False,
    'seed': 20170915,
}
args = argparse.Namespace(**parser)

torch.manual_seed(args.seed)

args.intermediate_path = os.path.join('../intermediate/', str(args.seed))
if not os.path.isdir(args.intermediate_path):
    os.mkdir(args.intermediate_path)

In [5]:
if args.split_data:
    # !mogrify -format png *.gif
    if not os.path.isdir(args.train_path):
        os.mkdir(args.train_path)
    if not os.path.isdir(args.train_masks_path):
        os.mkdir(args.train_masks_path)
    if not os.path.isdir(args.val_path):
        os.mkdir(args.val_path)
    if not os.path.isdir(args.val_masks_path):
        os.mkdir(args.val_masks_path)
    files = sorted([x.split('/')[-1] for x in glob.glob('../data/train_hq/*.jpg')])
    random.seed(args.seed)
    random.shuffle(files)
    for filename in files[:4320]:
        image = cv2.imread('../data/train_hq/' + filename)
        image = cv2.resize(image, (1024, 1024))
        cv2.imwrite(os.path.join(args.train_path, filename), image)
        mask_filename = '../data/train_masks/' + filename.replace('.jpg', '_mask.png')
        shutil.copy2(mask_filename, args.train_masks_path)
    for filename in files[4320:]:
        image = cv2.imread('../data/train_hq/' + filename)
        image = cv2.resize(image, (1024, 1024))
        cv2.imwrite(os.path.join(args.val_path, filename), image)
        mask_filename = '../data/train_masks/' + filename.replace('.jpg', '_mask.png')
        shutil.copy2(mask_filename, args.val_masks_path)

### model

In [6]:
class ConvBnRelu2d(nn.Module):
    
    def __init__(self, in_channels, out_channels, kernel_size=3, padding=1):
        super(ConvBnRelu2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, bias=False)
        self.bn   = nn.BatchNorm2d(out_channels, eps=1e-4)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        o = self.conv(x)
        if self.bn is not None:
            o = self.bn(o)
        return self.relu(o)

    def merge_bn(self):  # for faster inference
        if self.bn is None:
            return
        
        conv_weight     = self.conv.weight.data
        bn_weight       = self.bn.weight.data
        bn_bias         = self.bn.bias.data
        bn_running_mean = self.bn.running_mean
        bn_running_var  = self.bn.running_var
        bn_eps          = self.bn.eps

        N,C,H,W = conv_weight.size()
        std = torch.sqrt(bn_running_var+bn_eps)
        std_bn_weight = (bn_weight/std).repeat(C*H*W,1).t().contiguous().view(N,C,H,W)
        conv_weight_hat = std_bn_weight*conv_weight
        conv_bias_hat   = bn_bias - (bn_weight/std)*bn_running_mean
        
        self.conv = nn.Conv2d(self.conv.in_channels, self.conv.out_channels, self.conv.kernel_size,
                              padding=self.conv.padding, bias=True)
        self.conv.weight.data = conv_weight_hat
        self.conv.bias.data   = conv_bias_hat
        self.bn = None

In [7]:
class StackEncoder(nn.Module):
    
    def __init__(self, in_channels, out_channels, kernel_size=3, padding=1):
        super(StackEncoder, self).__init__()
        self.encode = nn.Sequential(
            ConvBnRelu2d(in_channels, out_channels, kernel_size, padding=padding),
            ConvBnRelu2d(out_channels, out_channels, kernel_size, padding=padding))

    def forward(self, x):
        e = self.encode(x)
        o = F.max_pool2d(e, kernel_size=2, stride=2)
        return e, o

In [8]:
class StackDecoder(nn.Module):
    
    def __init__(self, en_channels, in_channels, out_channels, kernel_size=3, padding=1):
        super(StackDecoder, self).__init__()
        self.decode = nn.Sequential(
            ConvBnRelu2d(en_channels+in_channels, out_channels, kernel_size=kernel_size, padding=padding),
            ConvBnRelu2d(out_channels, out_channels, kernel_size=kernel_size, padding=padding),
            ConvBnRelu2d(out_channels, out_channels, kernel_size=kernel_size, padding=padding))

    def forward(self, e, x):
        N,C,H,W = e.size()
        x = F.upsample(x, size=(H,W), mode='bilinear')
        x = torch.cat([e, x], dim=1)
        return self.decode(x)

In [9]:
class UNet1024(nn.Module):
    
    def __init__(self, in_shape):
        super(UNet1024, self).__init__()
        C,H,W = in_shape

        # 1024
        self.down1 = StackEncoder(  C,  24)  # 512
        self.down2 = StackEncoder( 24,  64)  # 256
        self.down3 = StackEncoder( 64, 128)  # 128
        self.down4 = StackEncoder(128, 256)  # 64
        self.down5 = StackEncoder(256, 512)  # 32
        self.down6 = StackEncoder(512, 768)  # 16

        self.center = ConvBnRelu2d(768, 768)

        # 16
        self.up6 = StackDecoder(768, 768, 512)  # 32
        self.up5 = StackDecoder(512, 512, 256)  # 64
        self.up4 = StackDecoder(256, 256, 128)  # 128
        self.up3 = StackDecoder(128, 128,  64)  # 256
        self.up2 = StackDecoder( 64,  64,  24)  # 512
        self.up1 = StackDecoder( 24,  24,  24)  # 1024
        
        self.mask = nn.Conv2d(24, 1, kernel_size=1)

    def forward(self, x):
        e1, o = self.down1(x)
        e2, o = self.down2(o)
        e3, o = self.down3(o)
        e4, o = self.down4(o)
        e5, o = self.down5(o)
        e6, o = self.down6(o)

        o = self.center(o)
        
        o = self.up6(e6, o)
        o = self.up5(e5, o)
        o = self.up4(e4, o)
        o = self.up3(e3, o)
        o = self.up2(e2, o)
        o = self.up1(e1, o)

        o = self.mask(o)
        o = F.upsample(o, size=(1280,1918), mode='bilinear')
        return torch.squeeze(o, dim=1)

### utils

In [10]:
def dice_score(probs, target, weight=None, use_mask=True, threshold=0.5):
    probs = (probs > threshold).float() if use_mask else probs
    N     = target.size(0)
    if weight is None:
        w = Variable(torch.ones(target.size()).cuda().half()).view(N, -1)
    else:
        w = weight.view(N, -1)
    w2    = w*w
    m1    = probs.view(N, -1)
    m2    = target.view(N, -1)
    score = (2*(w2*m1*m2).sum(dim=1) + 1) / ((w2*m1).sum(dim=1) + (w2*m2).sum(dim=1) + 1)
    
    return score.sum()/N


def dice_loss(logits, target, weight=None):
    probs = F.sigmoid(logits)
    loss  = 1 - dice_score(probs, target, weight, use_mask=False)
    
    return loss


def criterion(logits, target):
    N,H,W = target.size()
    a = F.avg_pool2d(target, kernel_size=41, stride=1, padding=20)
    boundary = (a.ge(0.01) * a.le(0.99)).float()
    weight = Variable(torch.ones(a.size()).cuda())
    w0 = weight.sum()
    weight = weight + 2*boundary
    w1 = weight.sum()
    weight = weight*w0/w1
        
    return (F.binary_cross_entropy_with_logits(logits, target, weight)
            + dice_loss(logits, target, weight))

In [11]:
def image_to_tensor(image):
    image = image.transpose((2,0,1)).astype(np.float32)  # HWC -> CHW
    tensor = torch.from_numpy(image)
    return tensor


def label_to_tensor(label, threshold=0.5):
    label  = (label>threshold).astype(np.float32)
    tensor = torch.from_numpy(label)
    return tensor

In [12]:
class CarDataset(Dataset):

    def __init__(self, image_path, mask_path='', transform=[], mode='train'):
        super(CarDataset, self).__init__()
        self.img_names = sorted([x.split('/')[-1] for x in glob.glob(image_path + '/*.jpg')])
        self.img_path  = image_path
        self.mask_path = mask_path
        self.transform = transform
        self.mode      = mode

    def get_image(self, index):
        name  = self.img_names[index]
        file  = os.path.join(self.img_path, name)
        img   = cv2.imread(file)
        image = img / 255
        return image, name
    
    def get_label(self, name):
        name = name.replace('.jpg', '_mask.png')
        file = os.path.join(self.mask_path, name)
        mask = cv2.imread(file, cv2.IMREAD_GRAYSCALE)
        label = mask / 255
        return label

    def get_train_item(self, index):
        image, name = self.get_image(index)
        label = self.get_label(name)

        for t in self.transform:
            image, label = t(image, label)
        image = image_to_tensor(image)
        label = label_to_tensor(label)
        return image, label

    def get_test_item(self, index):
        image = self.get_image(index)

        for t in self.transform:
            image = t(image)
        image = image_to_tensor(image)
        return image

    def __getitem__(self, index):
        if self.mode == 'train':
            return self.get_train_item(index)
        elif self.mode == 'test':
            return self.get_test_item(index)

    def __len__(self):
        return len(self.img_names)

In [13]:
def random_shift_scale(images, shift_limit=(-0.0625,0.0625),
                       scale_limit=(1/1.1,1.1), prob=0.5):
    if random.random() < prob:
        H,W,C = images[0].shape

        scale  = random.uniform(scale_limit[0], scale_limit[1])
        dx = round(random.uniform(shift_limit[0],shift_limit[1])*W)
        dy = round(random.uniform(shift_limit[0],shift_limit[1])*H)
        box0 = np.array([[0,0], [W,0], [W,H], [0,H]]).astype(np.float32)
        box1 = box0 - np.array([W/2, H/2])
        box1 = scale * box1 + np.array([W/2+dx, H/2+dy])
        mat = cv2.getPerspectiveTransform(box0, box1)

        for n, image in enumerate(images):
            images[n] = cv2.warpPerspective(image, mat, (W, H), flags=cv2.INTER_LINEAR,
                                            borderMode=cv2.BORDER_REFLECT_101, borderValue=(0,0,0))
    return images


def train_augment(image, label):
    image, mask = random_shift_scale([image, mask], shift_limit=(-0.0625,0.0625),
                                     scale_limit=(0.91,1.21))
    return image, mask

In [14]:
def train(train_loader, model, optimizer):
    num_grad_acc = 32 // args.batch_size
    optimizer.zero_grad()
    model.train()
    train_loss = 0
    init_time = time.time()
    for i, (inputt, target) in enumerate(train_loader, 1):
        inputt = inputt.cuda()
        target = target.cuda()
        inputt = Variable(inputt)
        target = Variable(target)
        
        output = model(inputt)
        loss = criterion(output, target)
        loss.backward()
        if i % num_grad_acc == 0:
            optimizer.step()
            optimizer.zero_grad()
        train_loss += loss.data[0] * inputt.size(0)
        if i % args.log_every == 0:
            print("   % Time: {:4.0f}s | Batch: {:3} | Train loss: {:.4f}"
                  .format(time.time()-init_time, i, loss.data[0]))
    return train_loss / len(train_loader.dataset)

In [15]:
def validate(val_loader, model):
    model.eval().half()
    val_score = 0
    init_time = time.time()
    for i, (inputt, target) in enumerate(val_loader, 1):
        inputt = inputt.cuda().half()
        target = target.cuda().half()
        inputt = Variable(inputt)
        target = Variable(target)
        
        output = model(inputt)
        score = dice_score(F.sigmoid(output), target)
        val_score += score.data[0] * inputt.size(0)
    return val_score / len(val_loader.dataset), time.time() - init_time

### prepare

In [16]:
train_dataset = CarDataset(args.train_path, args.train_masks_path,
                           transform=[], mode='train')
train_loader  = DataLoader(train_dataset, args.batch_size, shuffle=True, num_workers=7)

In [17]:
val_dataset = CarDataset(args.val_path, args.val_masks_path,
                         transform=[], mode='train')
val_loader  = DataLoader(val_dataset, args.batch_size, num_workers=7)

In [None]:
model = UNet1024((3, 1024, 1024))
model.cuda()

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
scheduler = MultiStepLR(optimizer, milestones=[40])

### train

In [None]:
for epoch in range(1, 51):
    scheduler.step()
    print("=> EPOCH {} with lr {}".format(epoch, scheduler.get_lr()))
    init_time = time.time()
    train_loss = train(train_loader, model, optimizer)
    val_score = validate(val_loader, model)
    print("="*10)
    print("   % Epoch: {} | Time: {:4.0f}s | "
          "Train loss: {:.4f} | Val score: {:.4f}"
          .format(epoch, time.time()-init_time, train_loss, val_score))
    print("="*10)

=> EPOCH 1 with lr [0.01]
   % Time:    5s | Batch:  10 | Train loss: 1.2822
   % Time:   10s | Batch:  20 | Train loss: 1.1582
   % Time:   15s | Batch:  30 | Train loss: 1.1343
