In [1]:
import os
import pickle
import sys
from argparse import ArgumentParser

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import tqdm
from torch.nn import functional as fnn
from torch.utils import data
from torchvision import transforms
from sklearn.metrics import mean_squared_error

from hack_utils import NUM_PTS, CROP_SIZE
from hack_utils import ScaleMinSideToSize, CropCenter, TransformByKeys, RandomRotation90
from hack_utils import ThousandLandmarksDataset
from hack_utils import restore_landmarks_batch, create_submission

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# refactor functions from hack_train

def train(model, loader, loss_fn, optimizer, device):
    model.train()
    train_loss_wing = []
    train_loss_mse = []
    for batch in tqdm.notebook.tqdm(loader, total=len(loader), desc="training..."):
        images = batch["image"].cuda()  # B x 3 x CROP_SIZE x CROP_SIZE
        landmarks = batch["landmarks"] # B x (2 * NUM_PTS)

        pred_landmarks = model(images).cpu() # B x (2 * NUM_PTS)
        
        loss = loss_fn[1](pred_landmarks, landmarks)
        train_loss_wing.append(loss.item())
        
        # tried both loss, and there combination as main loss function, but mse showing better results
        loss = loss_fn[0](pred_landmarks, landmarks)
        train_loss_mse.append(loss.item())
        
      
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Train loss: wing_loss = {np.mean(train_loss_wing)}')
    return np.mean(train_loss_mse)


def validate(model, loader, loss_fn, device):
    model.eval()
    val_loss_wing = []
    val_loss_mse = []
    for batch in tqdm.notebook.tqdm(loader, total=len(loader), desc="validation..."):
        images = batch["image"].cuda()
        landmarks = batch["landmarks"]

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
            
        loss = loss_fn[0](pred_landmarks, landmarks)
        val_loss_mse.append(loss.item())
        
        loss = loss_fn[1](pred_landmarks, landmarks)
        val_loss_wing.append(loss.item())
        
    print(f'Val loss: wing_loss = {np.mean(val_loss_wing)}')
    return np.mean(val_loss_mse)


def predict(model, loader, device):
    model.eval()
    predictions = np.zeros((len(loader.dataset), NUM_PTS, 2))
    for i, batch in enumerate(tqdm.notebook.tqdm(loader, total=len(loader), desc="test prediction...")):
        images = batch["image"].cuda()

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
            
        pred_landmarks = pred_landmarks.numpy().reshape((len(pred_landmarks), NUM_PTS, 2))  # B x NUM_PTS x 2

        fs = batch["scale_coef"].numpy()  # B
        margins_x = batch["crop_margin_x"].numpy()  # B
        margins_y = batch["crop_margin_y"].numpy()  # B
        prediction = restore_landmarks_batch(pred_landmarks, fs, margins_x, margins_y)  # B x NUM_PTS x 2
        predictions[i * loader.batch_size: (i + 1) * loader.batch_size] = prediction

    return predictions

    

data_path = './data/'
from matplotlib import pyplot as plt

### Load data and add transforms

In [2]:
import albumentations as albu
# 1. prepare data & models
train_augmentations = albu.Compose([
    albu.HueSaturationValue(
                    hue_shift_limit=10,
                    sat_shift_limit=20,
                    val_shift_limit=0,
                    p=0.3),
    albu.RandomBrightnessContrast(
                    brightness_limit=0.3,
                    contrast_limit=0.3,
                    p=0.3)])
train_transforms = transforms.Compose([
    RandomRotation90(0), # do not rotatet, lower score with 90 degree rotation 
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ("image",)),
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ("image",)),
    
])

val_transforms = transforms.Compose([
    RandomRotation90(0),
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ("image",)),
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ("image",)),
    
])

print("Reading data...")
train_dataset = ThousandLandmarksDataset(os.path.join(data_path, 'train'), train_augmentations, train_transforms, split="train")

val_dataset = ThousandLandmarksDataset(os.path.join(data_path, 'train'), None, val_transforms, split="val")


batch_size = 128
num_workers=4
train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True,
                                   shuffle=True, drop_last=True)
val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True,
                                 shuffle=False, drop_last=False)

Reading data...


315144it [06:59, 751.40it/s]
393931it [01:45, 3720.42it/s]  


In [3]:
from coord_conv import CoordConv
import segmentation_models_pytorch as smp
class Convert(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)
            
class LinearHead(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, activation=None, upsampling=1):
        super().__init__()
        # also tried to use CoordConv layer
        self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
        self.convert = Convert()
        self.linear = nn.Linear(CROP_SIZE**2, 2 * NUM_PTS, bias=True)
        
        
    def forward(self, x):
        res = {}
        x = self.conv2d(x)
        out = self.convert(x)
        out = self.linear(out)
        return  out
        
        
    
ENCODER = 'vgg16_bn'
ENCODER_WEIGHTS = 'imagenet'
DEVICE = 'cuda'

CLASSES = ['0']
ACTIVATION = 'sigmoid'

model = smp.Unet(
    encoder_name=ENCODER, 
    encoder_weights=ENCODER_WEIGHTS, 
    classes=1, 
    activation=ACTIVATION,
).cuda()

# change segmentation head with custom linear head
model.segmentation_head = LinearHead(16, 1)
model.cuda(0);

In [4]:
# Wingloss implementation form github
import math
import torch
from torch import nn


# torch.log  and math.log is e based
class WingLoss(nn.Module):
    def __init__(self, omega=10, epsilon=2):
        super(WingLoss, self).__init__()
        self.omega = omega
        self.epsilon = epsilon

    def forward(self, pred, target):
        y = target
        y_hat = pred
        delta_y = (y - y_hat).abs()
        delta_y1 = delta_y[delta_y < self.omega]
        delta_y2 = delta_y[delta_y >= self.omega]
        loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon)
        C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon)
        loss2 = delta_y2 - C
        return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2))

In [5]:
device = 'cuda: 0'
optimizer = optim.Adam(model.parameters(), lr=3e-4, amsgrad=True)
loss_fn = fnn.mse_loss
losses = [loss_fn, WingLoss(5, 1)]

In [None]:
# 2. train & validate
n_epoch = 50
model.train()
print("Ready for training...")
best_val_loss = np.inf
for epoch in range(n_epoch):
    train_loss = train(model, train_dataloader, losses, optimizer, device=device)
    val_loss = validate(model, val_dataloader, losses, device=device)
    print("Epoch #{:2}:\ttrain loss: {:5.4}\tval loss: {:5.4}".format(epoch, train_loss, val_loss))
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        with open(f"vgg16_bn_unet_best_mse.pth", "wb") as fp:
            torch.save(model.state_dict(), fp) 

Ready for training...


HBox(children=(FloatProgress(value=0.0, description='training...', max=2462.0, style=ProgressStyle(description…

### Visualize landmarks

In [13]:
for b in val_dataloader:
    break
    
model.eval()
with torch.no_grad():
    _, x = model(b['image'][:10, ...].cuda())

from matplotlib.patches import Circle
def plot_img(img, points):
    fig,ax = plt.subplots(1)
    ax.set_aspect('equal')

    # Show the image
    ax.imshow(img.numpy().transpose(1,2,0))

    # Now, loop through coord arrays, and create a circle at each x,y pair
    for xx,yy in points:
        circ = Circle((xx,yy),0.5)
        ax.add_patch(circ)

    # Show the image
    plt.show()
n = 10
i = 0
for i, (img, pred) in enumerate(zip(b['image'], x)):
    if i >= n:
        break
    print('Predicted landmarks')
    plot_img(img, pred.cpu().numpy().reshape(1942//2, 2))
    print('True landmarks')
    plot_img(img, b['landmarks'][i].numpy().reshape(1942//2, 2))
    print('-'*40)
    i+=1

### Create submission

In [21]:
import cv2
test_transforms = transforms.Compose([
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ("image",)),
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ("image",)),
    
])
test_dataset = ThousandLandmarksDataset(os.path.join(data_path, 'test'), None, test_transforms, split="test")
test_dataloader = data.DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True,
                                  shuffle=False, drop_last=False)

with open(f"vgg16_bn_unet_best_mse.pth", "rb") as fp:
    best_state_dict = torch.load(fp, map_location="cpu")
    model.load_state_dict(best_state_dict)

test_predictions = predict(model, test_dataloader, device)

create_submission(data_path, test_predictions, f"vgg16_unet_submit.csv")


99820it [00:00, 436308.85it/s]


HBox(children=(FloatProgress(value=0.0, description='test prediction...', max=780.0, style=ProgressStyle(descr…


