# Submission notebook for Severstal: Steel Defect Detection
https://www.kaggle.com/c/severstal-steel-defect-detection

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader, sampler
from torchvision import transforms as T
from torchvision import models

from PIL import Image

from tqdm import tqdm

In [3]:
TEST_PATH = '/kaggle/input/severstal-steel-defect-detection/test_images'
MODEL_PATH = '/kaggle/input/unetd-model-weights/unetd_D5F48E20_final.pth'

if torch.cuda.is_available():
    device = torch.device('cuda')
    dtype = torch.cuda.FloatTensor
else:
    device = torch.device('cpu')

    dtype = torch.FloatTensor

## Transforms

In [4]:
img_mean = (0.3438812517320017,)
img_std = (0.13965334396720055,)

transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=img_mean, std=img_std)
])

## Dataset

In [5]:
NUM_FRAMES = 7
FRAME_SIZE = (256, 256)
IMG_SIZE = (256, 1600)
OVERLAP = (NUM_FRAMES * FRAME_SIZE[1] - IMG_SIZE[1]) // (NUM_FRAMES - 1)

class FramesDataset(Dataset):
    """Severstal kaggle competition dataset
    """
    def __init__(self,
                 datadir,
                 imglist=None,
                 transform=T.ToTensor()):
        self.datadir = datadir
        self.transform = transform

        self.imglist = os.listdir(datadir)
        self.imglist.sort()

    def __len__(self):
        return NUM_FRAMES * len(self.imglist)

    def __getitem__(self, index):
        img_idx = index // NUM_FRAMES
        frame_idx = index % NUM_FRAMES
        left = frame_idx * (FRAME_SIZE[1] - OVERLAP)

        fname = self.imglist[img_idx]
        img = Image.open(os.path.join(self.datadir, fname)).convert(mode='L')
        # For all input images with R == G == B. Checked

        img = self.transform(img)
        img = img[:, :, left:left+FRAME_SIZE[1]] # Crop frame

        return img, fname

ds = FramesDataset(TEST_PATH, transform)

## Loader

In [6]:
params_loader = {
    'batch_size': NUM_FRAMES,
    'num_workers': 4,
    'pin_memory': True
}

# Data loaders
loader = DataLoader(
    ds,
    shuffle=False,
    **params_loader
)

## Model

In [7]:
def swish(x):
    """Swish activation function by Google
    $Swish = x * \sigma(x)$
    """
    return x * torch.sigmoid(x)

class Swish(nn.Module):
    """Swish activation function by Google
    $Swish = x * \sigma(x)$
    """
    def forward(self, x):
        return swish(x)


activations = {
    'relu': F.relu,
    'swish': swish
    }


class ConvBlock(nn.Module):
    def __init__(self, in_channel, out_channel, pad=0, bn=False, activation='relu'):
        """
        Convolutional block of U-net architecture without final activation
        (it is optimal to make ReLU after max pool)
        """
        super().__init__()
        self.bn = bn
        self.activation = activations[activation]

        self.conv1 = nn.Conv2d(in_channel, out_channel,
                               (3, 3), padding=pad, bias=True)
        self.conv2 = nn.Conv2d(out_channel, out_channel,
                               (3, 3), padding=pad, bias=True)

        if self.bn:
            self.bn1 = nn.BatchNorm2d(out_channel)
            self.bn2 = nn.BatchNorm2d(out_channel)

    def forward(self, x):
        x = self.conv1(x)
        if self.bn: x = self.bn1(x)
        x = self.conv2(self.activation(x))
        if self.bn: x = self.bn2(x)

        return x


class UpPool(nn.Module):
    """
    Up convolution on the way up
    Accepts input x from previouse layer and concatenates output with
    features f from down pass
    """
    def __init__(self, in_channel):
        super().__init__()
        self.upconv = nn.ConvTranspose2d(in_channel, in_channel // 2,
                                         (2, 2), stride=2, bias=True)
    
    def forward(self, x, f):
        x = self.upconv(F.relu(x))
        # do we need relu for x here?
        out = F.relu(torch.cat([f, x], dim=1))

        return out


class UnetD(nn.Module):
    """Unet with custom depth D
    """
    def __init__(self, depth, n_filters, bn=False, activation='relu'):
        super().__init__()
        self.depth = depth

        self.activation = activations[activation]

        # down
        self.dn_blks = nn.ModuleList()
        in_ch = 1
        out_ch = n_filters
        for dd in range(self.depth):
            self.dn_blks.append(ConvBlock(in_ch, out_ch, pad=1, bn=bn, activation=activation))
            in_ch = out_ch
            out_ch *= 2

        # bottom
        self.bottom = ConvBlock(in_ch, out_ch, pad=1, bn=bn, activation=activation)
        in_ch, out_ch = out_ch, in_ch

        # up
        self.upconvs = nn.ModuleList()
        self.up_blks = nn.ModuleList()
        for dd in range(self.depth):
            self.upconvs.append(UpPool(in_ch))
            self.up_blks.append(ConvBlock(in_ch, out_ch, pad=1, bn=bn, activation=activation))
            in_ch = out_ch
            out_ch = out_ch // 2

        # output
        self.outconv = nn.Conv2d(n_filters, 5, (1, 1), bias=True)

    def forward(self, x):
        outs = []
        for dn_blk in self.dn_blks:
            x = dn_blk(x)
            outs.append(x)
            x = self.activation(F.max_pool2d(x, (2, 2)))

        x = self.bottom(x)
        outs.reverse()

        for upconv, up_blk, out in zip(self.upconvs, self.up_blks, outs):
            x = up_blk(upconv(x, out))

        x = self.outconv(self.activation(x))

        return x

In [8]:
params_unet = {
    'depth': 5,
    'n_filters': 48,
    'bn': True,
    'activation': 'relu'
}

unet = UnetD(**params_unet)
unet = unet.to(device=device);

# Load weights
unet.load_state_dict(torch.load(MODEL_PATH))

<All keys matched successfully>

> ## Submission

In [9]:
def rle_encode(mask):
    """Encode image mask to run length encoding string
    """
    dots = np.where(mask.T.flatten() == 1)[0] # .T for Fortran order (down then right)
    rle = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            rle.extend((b + 1, 0))
        rle[-1] += 1
        prev = b

    rle = ' '.join(map(str, rle))

    return rle

def combine_frames(scores):
    """Combine scores with batch size of 7 frames to predicted image
    """
    assert scores.shape == (7, 5, 256, 256), f"Input tensor shape {scores.shape}, while (7, 5, 256, 256) expected"
    
    scores = scores.cpu()
    
    out = torch.zeros(5, 256, 1600)
    for ii in range(scores.shape[0]):
        left = ii * 224
        out[:, :, left:left+256] += scores[ii, :, :, :]
        if left > 0:
            out[left : left+32] /= 2

    return out

In [10]:
%%time

def gen_submission(model, loader):
    model.eval()
    
    with open('submission.csv', mode='w') as submission:
        submission.write('ImageId_ClassId,EncodedPixels\n')
        with torch.no_grad():
            for x, fname in tqdm(loader):
                x = x.to(device)
                fname = fname[0]
                
                scores = model(x)
                pred = combine_frames(scores)
                pred = torch.argmax(pred, dim=0)
                pred = pred.cpu().numpy()
                for defect_type in range(1, 5):
                    im_cls = '_'.join([fname, str(defect_type)])
                    rle = rle_encode(pred == defect_type)
                    submission.write(f"{im_cls},{rle}\n")

gen_submission(unet, loader)

100%|██████████| 5506/5506 [20:30<00:00,  4.47it/s]

CPU times: user 17min 45s, sys: 2min 5s, total: 19min 50s
Wall time: 20min 30s



