In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
import gc

In [None]:
gc.collect()

In [None]:
import glob
import math
from functools import partial
from multiprocessing import Pool

In [None]:
import torch
import cv2
import numpy as np
import pandas as pd
import tqdm
from skimage.morphology import label

In [None]:
import torch.utils.model_zoo as model_zoo
import torch.nn as nn
import torch.nn.functional as F

In [None]:
torch.cuda.is_available()

In [None]:
len(os.listdir("../input/airbus-ship-detection/test_v2"))

In [None]:
def predict_batch_classifier(images, model):

    images = np.array(images)
    images = torch.from_numpy(images)
    images = images.to(DEVICE)
    images = images.float() / 255
    images = images.permute(0, 3, 1, 2)
    images = normalize(images)
    output = model(images)
    probs = torch.sigmoid(output)
    return probs.data.cpu().numpy().squeeze()

In [None]:
def batch_iterate(cases, batch_size):

    full, remainder = divmod(len(cases), batch_size)

    for k in range(full):
        yield cases[k * batch_size : (k + 1) * batch_size]

    if remainder:
        yield cases[-remainder:]

In [None]:
def load_single_image(image_file):

    image = cv2.imread(image_file, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    return image

In [None]:
def resize_image(image, size):
    return cv2.resize(image, (size, size))

In [None]:
IMAGES_SOURCE = "../input/airbus-ship-detection/test_v2/"

CLASSIFIER_IMAGE_SIZE = 384
CLASSIFIER_BATCH_SIZE = 64
UNET_IMAGE_SIZE = 512
UNET_BATCH_SIZE = 16
CLASSIFIER_THRESHOLD = 0.8
UNET_THRESHOLD = 0.5
ORIGINAL_IMAGE_SIZE = 768

N_THREADS = 4

DEVICE = "cuda"
DTYPE = np.float32

In [None]:
def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

In [None]:
class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, layer2_stride=2):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=layer2_stride)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [None]:
def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
    return model

In [None]:
class SupervisedClassificationModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.network = resnet18(num_classes=1000, pretrained=False, layer2_stride=4)
        self.non_empty_conv = nn.Conv2d(512, 1, kernel_size=1, padding=0)
        self.pool = nn.AdaptiveMaxPool2d(1)

    def forward(self, x):

        x = self.network.conv1(x)
        x = self.network.bn1(x)
        x = self.network.relu(x)
        x = self.network.maxpool(x)

        x = self.network.layer1(x)
        x = self.network.layer2(x)
        x = self.network.layer3(x)
        x = self.network.layer4(x)

        non_empty_logits_raw = self.non_empty_conv(x)
        non_empty_logits = self.pool(non_empty_logits_raw).squeeze(-1).squeeze(-1)

        return non_empty_logits

In [None]:
model = SupervisedClassificationModel().to(DEVICE)
state_dict = torch.load(
    "../input/airbusspeedprize/final_model.pth",
    map_location="cuda:0" if torch.cuda.is_available() else "cpu"
)
model.load_state_dict(state_dict)
model = model.eval()

In [None]:
class TorchBatchNormalizer():

    def __init__(self, device, dtype):

        mean = np.reshape([0.485, 0.456, 0.406], (1, 3, 1, 1))
        std = np.reshape([0.229, 0.224, 0.225], (1, 3, 1, 1))

        self.mean = torch.from_numpy(mean.astype(dtype)).to(device)
        self.std = torch.from_numpy(std.astype(dtype)).to(device)

    def __call__(self, images):
        return (images - self.mean) / self.std

In [None]:
def extract_id(case):
    basename = os.path.basename(case)
    id, ext = os.path.splitext(basename)
    return id

In [None]:
normalize = TorchBatchNormalizer(DEVICE, DTYPE)

In [None]:
cases = glob.glob(os.path.join(IMAGES_SOURCE, "*.jpg"))

In [None]:
len(cases)

In [None]:
import time

In [None]:
total_time = 0

In [None]:
classifier_probs = []

resize = partial(resize_image, size=CLASSIFIER_IMAGE_SIZE)

total_time_classifier = 0
current_start = time.time()

with Pool(N_THREADS) as pool:

    batch_iterator = batch_iterate(cases, CLASSIFIER_BATCH_SIZE)
    cases_batch = next(batch_iterator)
    total_time_classifier += (time.time() - current_start)
    images_batch = [load_single_image(im) for im in cases_batch]
    current_start = time.time()
    future_result = pool.map_async(resize, images_batch)

    with torch.no_grad():
        for n, cases_batch in enumerate(tqdm.tqdm_notebook(
            list(batch_iterator),
            desc="Predicting", ncols=70)):

            images = future_result.get()    
            total_time_classifier += (time.time() - current_start)
            images_batch = [load_single_image(im) for im in cases_batch]
            current_start = time.time()
            future_result = pool.map_async(resize, images_batch)

            probs = predict_batch_classifier(images, model)
            classifier_probs.extend(probs)

        # process the last batch
        images = future_result.get()
        probs = predict_batch_classifier(images, model)
        classifier_probs.extend(probs)
        
        total_time_classifier += (time.time() - current_start)

In [None]:
print(total_time_classifier / 60)
total_time += total_time_classifier

In [None]:
model = model.to("cpu")
torch.cuda.empty_cache()

In [None]:
current_start = time.time()

non_empty = np.array(classifier_probs) > CLASSIFIER_THRESHOLD
non_empty_cases = np.array(cases)[non_empty]
empty_cases = np.array(cases)[~non_empty]

total_time += time.time() - current_start

In [None]:
len(non_empty_cases)

In [None]:
class DecoderBlockV2(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):
        super(DecoderBlockV2, self).__init__()
        self.in_channels = in_channels

        if is_deconv:
            """
                Paramaters for Deconvolution were chosen to avoid artifacts, following
                link https://distill.pub/2016/deconv-checkerboard/
            """

            self.block = nn.Sequential(
                ConvRelu(in_channels, middle_channels),
                nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,
                                   padding=1),
                nn.ReLU(inplace=True)
            )
        else:
            self.block = nn.Sequential(
                ConvRelu(in_channels, middle_channels),
                nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
                ConvRelu(middle_channels, out_channels),
            )

    def forward(self, x):
        x = self.block(x)
        return x

In [None]:
class ConvRelu(nn.Module):
    def __init__(self, in_, out):
        super().__init__()
        self.conv = conv3x3(in_, out)
        self.activation = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.activation(x)
        return x

In [None]:
class UNet(nn.Module):
    def __init__(self, num_classes=1, num_filters=32, pretrained=True, is_deconv=False):
        """
        :param num_classes:
        :param num_filters:
        :param pretrained:
            False - no pre-trained network is used
            True  - encoder is pre-trained with resnet34
        :is_deconv:
            False: bilinear interpolation is used in decoder
            True: deconvolution is used in decoder
        """
        super().__init__()
        self.num_classes = num_classes

        self.pool = nn.MaxPool2d(2, 2)

        self.encoder = resnet18()
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Sequential(self.encoder.conv1,
                                   self.encoder.bn1,
                                   self.encoder.relu)


        self.conv2 = self.encoder.layer1
        self.conv3 = self.encoder.layer2
        self.conv4 = self.encoder.layer3
        self.conv5 = self.encoder.layer4

        self.dec5 = DecoderBlockV2(512, 256, 256, is_deconv)
        self.dec4 = DecoderBlockV2(512, 256, 128, is_deconv)
        self.dec3 = DecoderBlockV2(256, 128, 64, is_deconv)
        self.dec2 = DecoderBlockV2(128, 64, 32, is_deconv)
        self.dec1 = DecoderBlockV2(96, 64, 32, is_deconv)
        self.final = nn.Conv2d(32 , 1, kernel_size=1)
        
    def forward(self, x):
        conv1 = self.conv1(x)
        conv2 = self.conv2(self.pool(conv1))
        conv3 = self.conv3(conv2)
        conv4 = self.conv4(conv3)
        conv5 = self.conv5(conv4)
        
        dec5 = self.dec5(conv5)
        dec4 = self.dec4(torch.cat([dec5, conv4], 1))
        dec3 = self.dec3(torch.cat([dec4, conv3], 1))
        dec2 = self.dec2(torch.cat([dec3, conv2], 1))
        dec1 = self.dec1(torch.cat([dec2, conv1], 1))
        
        output = self.final(dec1)
        return F.interpolate(output, size=(ORIGINAL_IMAGE_SIZE, ORIGINAL_IMAGE_SIZE))

In [None]:
model = UNet().to(DEVICE)
state_dict = torch.load(
    "../input/airbusspeedprize/resnet18_104_0.4799_added2.pth",
    map_location="cuda:0" if torch.cuda.is_available() else "cpu"
)
model.load_state_dict(state_dict["state_dict"])
model = model.eval()

In [None]:
def predict_batch_unet(images, model):

    images = np.array(images)
    images = torch.from_numpy(images)
    images = images.to(DEVICE)
    images = images.float() / 255
    images = images.permute(0, 3, 1, 2)
    output = model(images)
    probs = torch.sigmoid(output)
    predictions = probs > UNET_THRESHOLD
    return predictions.data.cpu().numpy().squeeze()

In [None]:
unet_predictions = []

resize = partial(resize, size=UNET_IMAGE_SIZE)

total_time_unet = 0
current_start = time.time()

with Pool(N_THREADS) as pool:

    batch_iterator = batch_iterate(non_empty_cases, UNET_BATCH_SIZE)
    cases_batch = next(batch_iterator)
    total_time_unet += (time.time() - current_start)
    images_batch = [load_single_image(im) for im in cases_batch]
    current_start = time.time()
    future_result = pool.map_async(resize, images_batch)

    with torch.no_grad():
        for n, cases_batch in enumerate(tqdm.tqdm_notebook(
            list(batch_iterator),
            desc="Predicting", ncols=70)):

            images = future_result.get()
            total_time_unet += (time.time() - current_start)
            images_batch = [load_single_image(im) for im in cases_batch]
            current_start = time.time()
            future_result = pool.map_async(resize, images_batch)

            preds = predict_batch_unet(images, model)
            unet_predictions.extend(preds)

        # process the last batch
        images = future_result.get()
        preds = predict_batch_unet(images, model)
        unet_predictions.extend(preds)
        
        total_time_unet += (time.time() - current_start)

In [None]:
print(total_time_unet / 60)
total_time += total_time_unet

In [None]:
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.T.ravel()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return " ".join(str(x) for x in runs)

In [None]:
def multi_rle_encode(img):
    labels, n_labels = label(img, return_num=True)
    ships = [(labels == k) for k in range(1, n_labels + 1)]
    return [rle_encode(s) for s in ships if s.sum() >= 60]

In [None]:
current_start = time.time()

ids, rles = [], []

for case in empty_cases:
    ids.append(extract_id(case) + ".jpg")
    rles.append("")
    
for case, mask in zip(non_empty_cases, unet_predictions):
    id = extract_id(case) + ".jpg"
    case_rles = multi_rle_encode(mask)
    
    if case_rles:
        rles.extend(case_rles)
        ids.extend([id] * len(case_rles))
    else:
        ids.append(id)
        rles.append("")

In [None]:
len(empty_cases) + len(non_empty_cases)

In [None]:
submission = pd.DataFrame({"ImageId": ids, "EncodedPixels": rles})
submission.to_csv("submission.csv", index=False)

total_time += time.time() - current_start

In [None]:
submission.ImageId.nunique()

In [None]:
submission.shape

In [None]:
print("Total time {:.2f} minutes:".format(total_time / 60))