In [1]:
from __future__ import print_function, division
from collections import defaultdict, deque
import os
import sys
import csv
import time
import errno
import pickle
import random
import datetime
import warnings
import collections
import multiprocessing

from IPython.display import display
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
from matplotlib import pyplot as plt

import torch
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import utils
from torchvision.transforms import functional as TF
from torch.utils.data.sampler import Sampler
from torch.utils.data import DataLoader, Dataset

import faster_rcnn
from faster_rcnn.rpn import AnchorGenerator
from faster_rcnn.transform import GeneralizedRCNNTransform
from faster_rcnn.faster_rcnn import FastRCNNPredictor

import skimage
import skimage.io
import skimage.color
import skimage.transform

from sklearn.model_selection import train_test_split

from PIL import Image
import cv2

import albumentations as A
from albumentations import *
from albumentations.pytorch import ToTensor
%matplotlib inline

In [3]:
from typing import Callable

def get_transform(train: bool, predict_only=False) -> Callable:
    train_initial_size = 2048
    crop_min_max_height = (400, 533)
    crop_width = 512
    crop_height = 384
    if train:
        transforms = [
            A.LongestMaxSize(max_size=train_initial_size),
            A.RandomSizedCrop(
                min_max_height=crop_min_max_height,
                width=crop_width,
                height=crop_height,
                w2h_ratio=crop_width / crop_height,
            ),
            A.HueSaturationValue(
                hue_shift_limit=7,
                sat_shift_limit=10,
                val_shift_limit=10,
            ),
            A.RandomBrightnessContrast(),
            A.RandomGamma(),
        ]
        
    else:
        test_size = int(train_initial_size *
                        crop_height / np.mean(crop_min_max_height))
        print(f'Test image max size {test_size} px')
        transforms = [
            A.LongestMaxSize(max_size=test_size),
        ]
    transforms.extend([
        ToTensor(),
    ])
    if predict_only:
        return A.Compose(
            transforms
        )
    else:
        return A.Compose(
            transforms,
            bbox_params={
                'format': 'coco',
                'min_area': 0,
                'min_visibility': 0.5,
                'label_fields': ['labels'],
            },
        )

class ModelTransform(GeneralizedRCNNTransform):
    def __init__(self, image_mean, image_std):
        nn.Module.__init__(self)
        self.image_mean = image_mean
        self.image_std = image_std

    def resize(self, image, target):
        return image, target

In [4]:
df_all = pd.read_csv('input/train.csv')

df_train, df_val = train_test_split(df_all, test_size=0.2, random_state=42)

df_test = pd.read_csv('input/sample_submission.csv')

In [5]:
display(df_train.head())
print(df_train.shape)

display(df_val.head())
print(df_val.shape)

Unnamed: 0,image_id,labels
278,100249416_00017_2,U+4E00 517 433 71 11 U+4E00 1163 432 75 11 U+4...
1598,200015779_00087_1,
1532,200015779_00043_1,
2011,200021660-00080_2,U+4E00 337 936 97 21 U+3078 1424 3704 103 36 U...
2282,200021763-00043_1,U+30CF 1143 3033 92 35 U+30CF 501 804 60 37 U+...


(3104, 2)


Unnamed: 0,image_id,labels
3431,umgy001-048,U+304B 1107 2641 43 32 U+30CB 709 1742 50 33 U...
1018,200004148_00035_1,U+4E00 837 1831 74 13 U+306F 564 2233 44 28 U+...
3270,hnsd010-016,U+30CB 274 920 56 25 U+30FD 1077 2118 27 29 U+...
3613,umgy006-020,U+309D 689 2295 45 43 U+309D 311 2063 34 48 U+...
1200,200005598_00044_1,U+30FD 601 2300 27 40 U+3044 969 952 65 45 U+3...


(777, 2)


In [20]:
import os
import numpy as np
import torch
from PIL import Image
from pathlib import Path

def get_target_boxes_labels(item):
    if item.labels and type(item.labels) is str:
        labels = np.array(item.labels.split(' ')).reshape(-1, 5)
    else:
        labels = np.zeros((0, 5))
    boxes = labels[:, 1:].astype(np.float)
    labels = labels[:, 0]
    return boxes, labels

def get_image_path(item, root: Path = None) -> Path:
    path = root + '/' + f'{item.image_id}.jpg'
    return path

def read_image(path):
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

class Dataset(torch.utils.data.Dataset):
    def __init__(self, df: pd.DataFrame, transform: Callable, root: Path,
                 skip_empty: bool, is_train = True):
        self.df = df
        self.root = root
        self.transform = transform
        self.skip_empty = skip_empty
        self.is_train = is_train

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        item = self.df.iloc[idx]
        image = read_image(get_image_path(item, self.root))
        h, w, _ = image.shape
        if self.is_train:
            bboxes, labels = get_target_boxes_labels(item)
            # clip bboxes (else albumentations fails)
            bboxes[:, 2] = (np.minimum(bboxes[:, 0] + bboxes[:, 2], w)
                            - bboxes[:, 0])
            bboxes[:, 3] = (np.minimum(bboxes[:, 1] + bboxes[:, 3], h)
                            - bboxes[:, 1])
            xy = {
                'image': image,
                'bboxes': bboxes.tolist(),
                'labels': np.ones_like(labels, dtype=np.long),
            }
            xy = self.transform(**xy)
            if not xy['bboxes'] and self.skip_empty:
                return self[random.randint(0, len(self.df) - 1)]
            image = xy['image']
            boxes = torch.tensor(xy['bboxes']).reshape((len(xy['bboxes']), 4))
            # convert to pytorch detection format
            boxes[:, 2] += boxes[:, 0]
            boxes[:, 3] += boxes[:, 1]
            target = {
                'boxes': boxes,
                'labels': torch.tensor(xy['labels'], dtype=torch.long),
                'idx': torch.tensor(idx),
            }
            return image, target
        else:
            xy = {
                'image': image,
                'labels': np.ones_like(1, dtype=np.long),
            }
            xy = self.transform(**xy)
            image = xy['image']
            target = {
                'idx': torch.tensor(idx),
            }
            return image, target

In [7]:
def build_model(name: str, pretrained: bool, nms_threshold: float):
    anchor_sizes = [12, 24, 32, 64, 96]
    model = faster_rcnn.__dict__[name](
        pretrained=pretrained,
        rpn_anchor_generator=AnchorGenerator(
            sizes=tuple((s,) for s in anchor_sizes),
            aspect_ratios=tuple((0.5, 1.0, 2.0) for _ in anchor_sizes),
        ),
        box_detections_per_img=1000,
        box_nms_thresh=nms_threshold,
    )
    model.roi_heads.box_predictor = FastRCNNPredictor(
        in_channels=model.roi_heads.box_predictor.cls_score.in_features,
        num_classes=2)
    model.transform = ModelTransform(
        image_mean=model.transform.image_mean,
        image_std=model.transform.image_std,
    )
    return model

In [8]:
model = build_model('fasterrcnn_resnet152_fpn', pretrained=0, nms_threshold=0.25)

In [9]:
class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)

def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.no_grad():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict

class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(
            header, total_time_str, total_time / len(iterable)))


def collate_fn(batch):
    return tuple(zip(*batch))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        if x >= warmup_iters:
            return 1
        alpha = float(x) / warmup_iters
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)


def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True

def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()

In [35]:
def get_metrics(results):
    tp = int(sum([x['tp'] for x in results]))
    fp = int(sum([x['fp'] for x in results]))
    fn = int(sum([x['fn'] for x in results]))
    if (tp + fp) == 0 or (tp + fn) == 0:
        f1 = 0
    else:
        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        if precision > 0 and recall > 0:
            f1 = (2 * precision * recall) / (precision + recall)
        else:
            f1 = 0
    return {'f1': float(f1), 'tp': tp, 'fp': fp, 'fn': fn}

def format_value(v):
    if isinstance(v, float):
        return f'{v:.5f}'
    else:
        return str(v)

def print_metrics(metrics):
    print(' '.join(f'{k}={format_value(v)}' for k, v in metrics.items()))

In [36]:
def nms(dets, scores, thresh):
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = scores

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep

In [37]:
def score_boxes(truth_boxes, truth_label, preds_center, preds_label):
    assert isinstance(preds_label, np.ndarray)
    tp = fp = fn = 0
    # need to handle the same edge cases here as well
    if truth_boxes.shape[0] == 0 or preds_center.shape[0] == 0:
        fp += preds_center.shape[0]
        fn += truth_boxes.shape[0]
        return {'tp': tp, 'fp': fp, 'fn': fn}

    preds_x = preds_center[:, 0]
    preds_y = preds_center[:, 1]
    truth_xmin, truth_ymin, truth_xmax, truth_ymax = truth_boxes.T
    preds_unused = np.ones(len(preds_label)).astype(bool)
    for xmin, xmax, ymin, ymax, label in zip(
            truth_xmin, truth_xmax, truth_ymin, truth_ymax, truth_label):
        # Matching = point inside box & character same &
        # prediction not already used
        matching = ((xmin < preds_x) & (xmax > preds_x) &
                    (ymin < preds_y) & (ymax > preds_y) &
                    (preds_label == label) & preds_unused)
        if matching.sum() == 0:
            fn += 1
        else:
            tp += 1
            preds_unused[np.argmax(matching)] = False
    fp += preds_unused.sum()
    return {'tp': tp, 'fp': fp, 'fn': fn}

def to_coco(boxes: torch.Tensor) -> torch.Tensor:
    """ Convert from pytorch detection format to COCO format.
    """
    boxes = boxes.clone()
    boxes[:, 2] -= boxes[:, 0]
    boxes[:, 3] -= boxes[:, 1]
    return boxes


def from_coco(boxes: torch.Tensor) -> torch.Tensor:
    """ Convert from CODO to pytorch detection format.
    """
    boxes = boxes.clone()
    boxes[:, 2] += boxes[:, 0]
    boxes[:, 3] += boxes[:, 1]
    return boxes

def scale_boxes(
        boxes: torch.Tensor, w_scale: float, h_scale: float) -> torch.Tensor:
    return torch.stack([
        boxes[:, 0] * w_scale,
        boxes[:, 1] * h_scale,
        boxes[:, 2] * w_scale,
        boxes[:, 3] * h_scale,
        ]).t()


def submission_item(image_id, prediction):
    return {
        'image_id': image_id,
        'labels': ' '.join(
            ' '.join([p['cls']] +
                     [str(int(round(v))) for v in p['center']])
            for p in prediction),
    }

def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])


@torch.no_grad()
def evaluate(model, data_loader, device, output_dir, threshold):
    cpu_device = torch.device('cpu')
    model.eval()
    metric_logger = MetricLogger(delimiter='  ')
    header = 'Test:'
    scores = []
    clf_gt = []

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        evaluator_time = time.time()
        for target, image, output in zip(targets, images, outputs):
            item = data_loader.dataset.df.iloc[target['idx'].item()]
            del target
            target_boxes, target_labels = get_target_boxes_labels(item)
            target_boxes = torch.from_numpy(target_boxes)
            boxes = output['boxes'][output['scores'] >= threshold]
            boxes = to_coco(boxes)
            with Image.open(get_image_path(
                    item, data_loader.dataset.root)) as original_image:
                ow, oh = original_image.size
            _, h, w = image.shape
            w_scale = ow / w
            h_scale = oh / h
            scaled_boxes = scale_boxes(boxes, w_scale, h_scale)
            # If testing, scores is not important
            scores.append(
                dict(score_boxes(
                    truth_boxes=from_coco(target_boxes).numpy(),
                    truth_label=np.ones(target_labels.shape[0]),
                    preds_center=torch.stack(
                        [scaled_boxes[:, 0] + scaled_boxes[:, 2] * 0.5,
                         scaled_boxes[:, 1] + scaled_boxes[:, 3] * 0.5]
                    ).t().numpy(),
                    preds_label=np.ones(boxes.shape[0]),
                ), image_id=item.image_id))
            clf_gt.append({
                'labels': get_clf_gt(
                    target_boxes=target_boxes,
                    target_labels=target_labels,
                    boxes=scaled_boxes),
                'image_id': item.image_id,
            })
            if output_dir:
                unscaled_target_boxes = scale_boxes(
                    target_boxes, 1 / w_scale, 1 / h_scale)
                _save_predictions(
                    image, boxes, unscaled_target_boxes,
                    path = output_dir +  '/{}.jpg'.format(item.image_id))

        evaluator_time = time.time() - evaluator_time
        metric_logger.update(
            model_time=model_time, evaluator_time=evaluator_time)

    metric_logger.synchronize_between_processes()
    print('Averaged stats:', metric_logger)
    metrics = get_metrics(scores)
    print_metrics(metrics)
    
    return metrics, (scores, clf_gt)

In [38]:
SEG_FP = 'unk'

def get_clf_gt(target_boxes, target_labels, boxes, min_iou=0.5) -> str:
    """ Create ground truth for classification from predicted boxes
    in the same format as original ground truth, with addition of a class for
    false negatives. Perform matching using box IoU.
    """
    if boxes.shape[0] == 0:
        return ''
    if target_boxes.shape[0] == 0:
        labels = [SEG_FP] * boxes.shape[0]
    else:
        ious = bbox_overlaps(from_coco(target_boxes).numpy(),
                             from_coco(boxes).numpy())
        ious_argmax = np.argmax(ious, axis=0)
        assert ious_argmax.shape == (boxes.shape[0],)
        labels = []
        for k in range(boxes.shape[0]):
            n = ious_argmax[k]
            if ious[n, k] >= min_iou:
                label = target_labels[n]
            else:
                label = SEG_FP
            labels.append(label)
    return ' '.join(
        label + ' ' + ' '.join(str(int(round(float(x)))) for x in box)
        for box, label in zip(boxes, labels))



def bbox_overlaps(
        bboxes1: np.ndarray, bboxes2: np.ndarray, mode='iou') -> np.ndarray:
    """Calculate the ious between each bbox of bboxes1 and bboxes2.

    GH:open-mmlab/mmdetection/mmdet/core/evaluation/bbox_overlaps.py

    Args:
        bboxes1(ndarray): shape (n, 4)
        bboxes2(ndarray): shape (k, 4)
        mode(str): iou (intersection over union) or iof (intersection
            over foreground)

    Returns:
        ious(ndarray): shape (n, k)
    """

    assert mode in ['iou', 'iof']

    bboxes1 = bboxes1.astype(np.float32)
    bboxes2 = bboxes2.astype(np.float32)
    rows = bboxes1.shape[0]
    cols = bboxes2.shape[0]
    ious = np.zeros((rows, cols), dtype=np.float32)
    if rows * cols == 0:
        return ious
    exchange = False
    if bboxes1.shape[0] > bboxes2.shape[0]:
        bboxes1, bboxes2 = bboxes2, bboxes1
        ious = np.zeros((cols, rows), dtype=np.float32)
        exchange = True
    area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
        bboxes1[:, 3] - bboxes1[:, 1] + 1)
    area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
        bboxes2[:, 3] - bboxes2[:, 1] + 1)
    for i in range(bboxes1.shape[0]):
        x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
        y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
        x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
        y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
        overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
            y_end - y_start + 1, 0)
        if mode == 'iou':
            union = area1[i] + area2 - overlap
        else:
            union = area1[i] if not exchange else area2
        ious[i, :] = overlap / union
    if exchange:
        ious = ious.T
    return ious

BOX_COLOR = (255, 0, 0)
def visualize_box(image: np.ndarray, bbox, color=BOX_COLOR, thickness=2):
    x_min, y_min, w, h = bbox
    x_min, x_max, y_min, y_max = \
        int(x_min), int(x_min + w), int(y_min), int(y_min + h)
    cv2.rectangle(image, (x_min, y_min), (x_max, y_max),
                  color=color, thickness=thickness)


def visualize_boxes(image: np.ndarray, boxes, **kwargs):
    image = image.copy()
    for idx, bbox in enumerate(boxes):
        visualize_box(image, bbox, **kwargs)
    return image

def _save_predictions(image, boxes, target, path: Path):
    image = (image.detach().cpu() * 255).to(torch.uint8)
    image = np.rollaxis(image.numpy(), 0, 3)
    image = visualize_boxes(image, boxes, thickness=3)
    image = visualize_boxes(image, target, color=(0, 255, 0), thickness=2)
    Image.fromarray(image).save(path)

In [14]:
root = 'input/train_images'

# Train

In [16]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and word
num_classes = 2

In [23]:
# use our dataset and defined transformations
dataset = Dataset(
        df_train, get_transform(train=True), root, skip_empty=False)

dataset_val = Dataset(
        df_val, get_transform(train=False), root, skip_empty=False)

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=16, shuffle=True, num_workers=0,
    collate_fn=collate_fn)

data_loader_val = torch.utils.data.DataLoader(
    dataset_val, batch_size=1, shuffle=False, num_workers=0,
    collate_fn=collate_fn)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

# let's train it for 10 epochs
num_epochs = 15

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50)
    # update the learning rate
    lr_scheduler.step()

    #evaluate on the test dataset
    metrics, (scores, clf_gt) = evaluate(model, data_loader_val, device=device, output_dir='./submissions/',
            threshold=0.3)

torch.save(model.state_dict(), 'models/detection/segmentation.pth')

print("That's it!")

Test image max size 1685 px
Epoch: [0]  [  0/194]  eta: 0:11:08  lr: 0.000031  loss: 0.1749 (0.1749)  loss_classifier: 0.0959 (0.0959)  loss_box_reg: 0.0521 (0.0521)  loss_objectness: 0.0103 (0.0103)  loss_rpn_box_reg: 0.0165 (0.0165)  time: 3.4466  data: 2.0547  max mem: 7099
Epoch: [0]  [ 10/194]  eta: 0:10:33  lr: 0.000290  loss: 0.2128 (0.2064)  loss_classifier: 0.1112 (0.1066)  loss_box_reg: 0.0700 (0.0682)  loss_objectness: 0.0090 (0.0092)  loss_rpn_box_reg: 0.0227 (0.0224)  time: 3.4453  data: 2.0649  max mem: 7387
Epoch: [0]  [ 20/194]  eta: 0:09:54  lr: 0.000548  loss: 0.2176 (0.2069)  loss_classifier: 0.1112 (0.1057)  loss_box_reg: 0.0700 (0.0692)  loss_objectness: 0.0088 (0.0090)  loss_rpn_box_reg: 0.0235 (0.0231)  time: 3.4159  data: 2.0366  max mem: 7387


KeyboardInterrupt: 

## Test set prediction

In [42]:
model.load_state_dict(torch.load('models/detection/segmentation.pth'))

<All keys matched successfully>

In [43]:
model.to(device)
model.eval()

FasterRCNN(
  (transform): ModelTransform()
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(original_name=FrozenBatchNorm2d)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(original_name=FrozenBatchNorm2d)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(original_name=FrozenBatchNorm2d)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(original_name=FrozenBatchNorm2d)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): 

In [44]:
test_root = 'input/test_images'

In [45]:
df_test['labels'] = ''

dataset_test = Dataset(
        df_test, get_transform(train=False), test_root, skip_empty=False)

test_sampler = torch.utils.data.SequentialSampler(dataset_test)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=0,
    collate_fn=collate_fn)

_, (_, pred_results) = evaluate(
            model, data_loader_test, device=device, output_dir='./submissions/',
            threshold=0.5)

Test image max size 1685 px
Test:  [   0/4150]  eta: 0:34:41  model_time: 0.2453 (0.2453)  evaluator_time: 0.1147 (0.1147)  time: 0.5016  data: 0.1296  max mem: 7387
Test:  [ 100/4150]  eta: 0:33:39  model_time: 0.2289 (0.2250)  evaluator_time: 0.1152 (0.1123)  time: 0.4997  data: 0.1469  max mem: 7387
Test:  [ 200/4150]  eta: 0:32:54  model_time: 0.2244 (0.2245)  evaluator_time: 0.1147 (0.1124)  time: 0.5151  data: 0.1654  max mem: 7387
Test:  [ 300/4150]  eta: 0:32:26  model_time: 0.2284 (0.2240)  evaluator_time: 0.1137 (0.1127)  time: 0.5190  data: 0.1609  max mem: 7387
Test:  [ 400/4150]  eta: 0:31:42  model_time: 0.2224 (0.2236)  evaluator_time: 0.1137 (0.1126)  time: 0.5124  data: 0.1650  max mem: 7387
Test:  [ 500/4150]  eta: 0:30:59  model_time: 0.2224 (0.2237)  evaluator_time: 0.1147 (0.1125)  time: 0.5231  data: 0.1714  max mem: 7387
Test:  [ 600/4150]  eta: 0:30:15  model_time: 0.2254 (0.2239)  evaluator_time: 0.1127 (0.1126)  time: 0.5147  data: 0.1662  max mem: 7387
Test: 

In [46]:
pred_results[1]

{'labels': 'unk 1132 1693 91 124 unk 1456 1619 96 108 unk 1444 2583 96 151 unk 800 1066 100 112 unk 1605 1502 82 133 unk 1457 2342 73 106 unk 1298 2088 73 81 unk 1439 1161 94 79 unk 973 1865 84 115 unk 977 803 88 115 unk 1443 1320 92 117 unk 972 1012 92 95 unk 1276 2440 91 139 unk 182 1862 62 95 unk 338 1540 70 74 unk 1598 1292 87 137 unk 1610 1427 66 63 unk 177 2177 88 125 unk 1149 1609 65 85 unk 816 987 77 69 unk 1600 1634 77 97 unk 1458 1247 71 73 unk 964 1668 112 111 unk 799 2573 96 82 unk 1296 933 63 73 unk 1143 1407 67 70 unk 973 1773 78 94 unk 976 2075 88 117 unk 172 2069 83 114 unk 1446 1540 95 79 unk 645 1316 94 78 unk 987 934 67 73 unk 1293 1011 94 82 unk 497 2770 72 76 unk 1455 1930 80 129 unk 1162 2254 38 115 unk 982 1996 70 83 unk 813 803 86 112 unk 816 2657 66 111 unk 814 2774 75 84 unk 1480 1819 50 112 unk 500 2112 82 74 unk 1473 2053 48 105 unk 660 2766 61 85 unk 180 2539 53 97 unk 1292 1733 89 97 unk 1294 2179 72 73 unk 662 2411 64 124 unk 500 747 65 60 unk 338 1949 60

In [47]:
len(pred_results)

4150

In [50]:
if not os.path.exists('./detection_output'):
    os.mkdir('./detection_output')

pd.DataFrame(pred_results).to_csv( 'detection_output/detected.csv', index=None)