In [53]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [101]:
# Install required libs
!pip install -U segmentation-models-pytorch
!pip install -U catalyst

Requirement already up-to-date: segmentation-models-pytorch in /usr/local/lib/python3.6/dist-packages (0.1.0)
Collecting catalyst
[?25l  Downloading https://files.pythonhosted.org/packages/1c/7d/3ba6e33182ef43e80f859947b9505dc4e47f4a512842f881ddfe6a17de78/catalyst-20.3-py2.py3-none-any.whl (362kB)
[K     |████████████████████████████████| 368kB 8.0MB/s 
Collecting GitPython>=2.1.11
[?25l  Downloading https://files.pythonhosted.org/packages/d3/2f/6a366d56c9b1355b0880be9ea66b166cb3536392638d8d91413ec66305ad/GitPython-3.1.0-py3-none-any.whl (450kB)
[K     |████████████████████████████████| 460kB 18.3MB/s 
Collecting tensorboardX
[?25l  Downloading https://files.pythonhosted.org/packages/35/f1/5843425495765c8c2dd0784a851a93ef204d314fc87bcc2bbb9f662a3ad1/tensorboardX-2.0-py2.py3-none-any.whl (195kB)
[K     |████████████████████████████████| 204kB 18.3MB/s 
Collecting tqdm>=4.33.0
[?25l  Downloading https://files.pythonhosted.org/packages/47/55/fd9170ba08a1a64a18a7f8a18f088037316f2a41

## Loading data

For this example we will use **CamVid** dataset. It is a set of:
 - **train** images + segmentation masks
 - **validation** images + segmentation masks
 - **test** images + segmentation masks
 
All images have 320 pixels height and 480 pixels width.
For more inforamtion about dataset visit http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/.

In [0]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset

import numpy as np
import cv2
import matplotlib.pyplot as plt

from pycocotools.coco import COCO
from PIL import Image, ImageDraw

import torchvision.transforms as transforms
from torch import optim

from catalyst.contrib.nn import RAdam, Lookahead
from catalyst import utils

from sklearn.model_selection import StratifiedKFold

In [0]:
# helper function for data visualization
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

### Dataloader

Writing helper class for data extraction, tranformation and preprocessing  
https://pytorch.org/docs/stable/data

In [0]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

class imageCompressor(nn.Module):
    def __init__(self):
        super(imageCompressor, self).__init__()
        self.conv1 = nn.Conv2d(3, 8, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(8, 8, kernel_size=5, padding=2, stride=2)
        self.conv3 = nn.Conv2d(8, 8, kernel_size=5, padding=2)
        self.conv4 = nn.Conv2d(8, 3, kernel_size=5, padding=2, stride=2)        
        self.bn1 = nn.BatchNorm2d(3)
        self.bn2 = nn.BatchNorm2d(8)
        self.relu = nn.ReLU(inplace=False)

    def forward(self, x):
        x = self.conv1(self.relu(self.bn1(x)))
        x = self.conv2(self.relu(self.bn2(x)))
        x = self.conv3(self.relu(self.bn2(x)))
        x = self.conv4(self.relu(self.bn2(x)))
        return x

compress_model = imageCompressor()

In [0]:
class Dataset(BaseDataset):
    """CamVid Dataset. Read images, apply augmentation and preprocessing transformations.
    
    Args:
        images_dir (str): path to images folder
        masks_dir (str): path to segmentation masks folder
        class_values (list): values of classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """
    
    CLASSES = ['aircraft carrier', 'container', 'oil tanker', 'maritime vessels']
    
    def __init__(
            self, 
            src_dir,
            indexes=[],
            #images_dir, 
            #masks_dir, 
            classes=None, 
            augmentation=None, 
            preprocessing=None,
    ):
        #self.ids = os.listdir(images_dir)
        #self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
        #self.masks_fps = [os.path.join(masks_dir, image_id) for image_id in self.ids]
        
        coco_file = os.path.join(src_dir, 'custom_coco__', 'annotations', 'detection_ship.json')
        self.coco = COCO(coco_file)

        self.indexes = indexes

        # convert str names to class values on masks
        classes = ['aircraft carrier', 'container', 'oil tanker', 'maritime vessels']
        self.class_values = [self.CLASSES.index(cls.lower()) for cls in classes]
        
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, index):

        if not 0 == len(self.indexes):
            index = self.indexes[index]

        # read data
        #image = cv2.imread(self.images_fps[i])
        # 이미지를 로드 한다
        image_path = self.coco.imgs[index]['file_name']
        image = cv2.imread(os.path.join(src_dir, 'images', image_path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        #image = cv2.resize(image, (1408, 2176))

        #mask = cv2.imread(self.masks_fps[i], 0)
        # 마스크를 만든다        
        width, height = (750, 750)#image.shape[:2] # 3000 이미지가 압축기를 통과하면 이 사이즈가 됨
        img = Image.new('L', (width, height), 0)

        for annot in self.coco.imgToAnns[index]:
            label, polygon = annot['category_id'], annot['segmentation']
            resize_polygon = np.array(polygon)/(3000/width) # TODO: 원본이미지가 3000! 만약 변경이 있으면 주의
            polygon = [ (x, y) for x, y in zip(resize_polygon[0::2], resize_polygon[1::2])]
            ImageDraw.Draw(img).polygon(polygon, fill=int(label))

        mask = np.array(img)

        # extract certain classes from mask (e.g. cars)
        masks = [(mask == v) for v in self.class_values]
        mask = np.stack(masks, axis=-1).astype('float')

        # add background if mask is not binary
        if mask.shape[-1] != 1:
            background = 1 - mask.sum(axis=-1, keepdims=True)
            mask = np.concatenate((mask, background), axis=-1)

        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']

        # 파이토치 텐서로 이미지 변경
        tensor_image = transforms.ToTensor()(image).unsqueeze(0) #torch.Size([1, 3, 744, 744])
        # 이미지를 압축한다
        image = compress_model(tensor_image).squeeze()        
        # tensor에서 numpy로 자료형 변경
        image = image.permute(1, 2, 0).detach().numpy()
            
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
            
        return image, mask
        
    def __len__(self):
        if not 0 == len(self.indexes):
            return len(self.indexes)
        return len(self.coco.imgs)

In [9]:
# Lets look at data we have
src_dir = r"/content/gdrive/My Drive/findShip/"
dataset = Dataset(src_dir,augmentation=get_training_augmentation())

image, mask = dataset[843] # get some sample
visualize(
    image=image, 
    gt_mask1=mask[..., 0].squeeze(),
    gt_mask2=mask[..., 1].squeeze(),
    gt_mask3=mask[..., 2].squeeze(),                
    gt_mask4=mask[..., 3].squeeze(),
    gt_mask5=mask[..., 4].squeeze()
)

NameError: ignored

### Augmentations

Data augmentation is a powerful technique to increase the amount of your data and prevent model overfitting.  
If you not familiar with such trick read some of these articles:
 - [The Effectiveness of Data Augmentation in Image Classification using Deep
Learning](http://cs231n.stanford.edu/reports/2017/pdfs/300.pdf)
 - [Data Augmentation | How to use Deep Learning when you have Limited Data](https://medium.com/nanonets/how-to-use-deep-learning-when-you-have-limited-data-part-2-data-augmentation-c26971dc8ced)
 - [Data Augmentation Experimentation](https://towardsdatascience.com/data-augmentation-experimentation-3e274504f04b)

Since our dataset is very small we will apply a large number of different augmentations:
 - horizontal flip
 - affine transforms
 - perspective transforms
 - brightness/contrast/colors manipulations
 - image bluring and sharpening
 - gaussian noise
 - random crops

All this transforms can be easily applied with [**Albumentations**](https://github.com/albu/albumentations/) - fast augmentation library.
For detailed explanation of image transformations you can look at [kaggle salt segmentation exmaple](https://github.com/albu/albumentations/blob/master/notebooks/example_kaggle_salt.ipynb) provided by [**Albumentations**](https://github.com/albu/albumentations/) authors.

In [0]:
import albumentations as albu

In [0]:
def get_training_augmentation():
    train_transform = [

        albu.HorizontalFlip(p=0.5),
        albu.VerticalFlip(p=0.5),
        albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=359, shift_limit=0.1, p=1, border_mode=0),

        albu.IAAAdditiveGaussianNoise(p=0.2),
        albu.IAAPerspective(p=0.5),

        albu.OneOf(
            [
                albu.CLAHE(p=1),
                albu.RandomBrightness(p=1),
                albu.RandomGamma(p=1),
            ],
            p=0.8,
        ),

        albu.OneOf(
            [
                albu.IAASharpen(p=1),
                albu.Blur(blur_limit=3, p=1),
                albu.MotionBlur(blur_limit=3, p=1),
            ],
            p=0.8,
        ),

        albu.OneOf(
            [
                albu.RandomContrast(p=1),
                albu.HueSaturationValue(p=1),
            ],
            p=0.8,
        ),
    ]
    return albu.Compose(train_transform)


def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.RandomRotate90()        
    ]
    return albu.Compose(test_transform)

def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')

def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)

## Create model and train

In [0]:
import torch
import numpy as np
import segmentation_models_pytorch as smp

In [0]:
ENCODER = 'se_resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['aircraft carrier', 'container', 'oil tanker', 'maritime vessels']
ACTIVATION = 'sigmoid' # could be None for logits or 'softmax2d' for multicalss segmentation
DEVICE = 'cuda'
BATCH_SIZE = 4
# create segmentation model with pretrained encoder
"""
model = smp.FPN(
    encoder_name=ENCODER, 
    encoder_weights=ENCODER_WEIGHTS, 
    classes=len(CLASSES), 
    activation=ACTIVATION,
)

preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)
"""
model = smp.Unet(encoder_name='resnet34', encoder_weights=None,
             classes=len(CLASSES)+1, activation='logsoftmax')
preprocessing_fn = smp.encoders.get_preprocessing_fn('resnet34')


In [14]:
train_dataset = Dataset(
    src_dir, 
    augmentation=get_training_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

valid_dataset = Dataset(
    src_dir, 
    augmentation=get_validation_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=12)
valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=4)

loading annotations into memory...
Done (t=0.16s)
creating index...
index created!
loading annotations into memory...
Done (t=0.26s)
creating index...
index created!


In [0]:
# Dice/F1 score - https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
# IoU/Jaccard score - https://en.wikipedia.org/wiki/Jaccard_index

loss = smp.utils.losses.DiceLoss()
metrics = [smp.utils.metrics.IoU(threshold=0.5), smp.utils.metrics.Fscore(threshold=0.5)]

learning_rate = 0.01
encoder_learning_rate = 0.005
# Since we use a pre-trained encoder, we will reduce the learning rate on it.
layerwise_params = {"encoder*": dict(lr=encoder_learning_rate, weight_decay=0.00003)}

# This function removes weight_decay for biases and applies our layerwise_params
model_params = utils.process_model_params(model, layerwise_params=layerwise_params)

# Catalyst has new SOTA optimizers out of box
base_optimizer = RAdam(model_params, lr=learning_rate, weight_decay=0.0003)
optimizer = Lookahead(base_optimizer)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.25, patience=2)

In [0]:
# create epoch runners 
# it is a simple loop of iterating over dataloader`s samples
train_epoch = smp.utils.train.TrainEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    optimizer=optimizer,
    device=DEVICE,
    verbose=True,
)

valid_epoch = smp.utils.train.ValidEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    device=DEVICE,
    verbose=True,
)

In [27]:
skf = StratifiedKFold(n_splits=3)

coco_file = os.path.join(src_dir, 'custom_coco__', 'annotations', 'detection_ship.json')
coco = COCO(coco_file)

images = np.array([coco.imgs[idx] for idx in coco.imgs])
labels = np.array([len(coco.imgToAnns[idx]) for idx in coco.imgs])

print(len(images))
print(len(labels))

max_score = 0
model.train()
for epoch, idxs in enumerate(skf.split(images, labels)):
    train_index, test_index = idxs
    print(len(train_index))
    print(len(test_index))

    train_dataset = Dataset(src_dir, indexes=train_index, classes=CLASSES,
                            augmentation=get_training_augmentation(),
                            preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = Dataset(src_dir, indexes=test_index, classes=CLASSES,
                            augmentation=get_validation_augmentation(),
                            preprocessing=get_preprocessing(preprocessing_fn))

    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=12)
    valid_dataloader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=4)

    # train model    
    print('\nEpoch: {}'.format(epoch))
    scheduler.step(epoch)
    train_logs = train_epoch.run(train_dataloader)
    valid_logs = valid_epoch.run(valid_dataloader)
    
    # do something (save model, change lr, etc.)
    if max_score < valid_logs['iou_score']:
        max_score = valid_logs['iou_score']
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, os.path.join(src_dir, 'models', '%s_%s_%s.h5' % (epoch, BACKBONE, max_score)))
        print('Model saved!')
        
    for i in range(3):
        image, mask = valid_dataset[i]
        visualize(
            image=image,
            gt_mask1=mask[..., 0].squeeze(),
            gt_mask2=mask[..., 1].squeeze(),
            gt_mask3=mask[..., 2].squeeze(),                
            gt_mask4=mask[..., 3].squeeze()
        )


loading annotations into memory...
Done (t=0.17s)
creating index...
index created!
2646
2646
1764
882
loading annotations into memory...



The least populated class in y has only 1 members, which is less than n_splits=3.



Done (t=0.33s)
creating index...
index created!
loading annotations into memory...
Done (t=0.15s)
creating index...
index created!

Epoch: 0
train:   0%|          | 0/441 [00:00<?, ?it/s](3000, 3000, 3)
(3000, 3000, 3)
(3000, 3000, 3)
(3000, 3000, 3)
(3000, 3000, 3)
(3000, 3000, 3)
(3000, 3000, 3)
(3000, 3000, 3)
(3000, 3000, 3)
(3000, 3000, 3)
(3000, 3000, 3)
train:   0%|          | 0/441 [00:24<?, ?it/s]
(3000, 3000, 3)


RuntimeError: ignored

In [77]:
# train model for 40 epochs

max_score = 0

for i in range(0, 10):
    
    print('\nEpoch: {}'.format(i))
    train_logs = train_epoch.run(train_loader)
    valid_logs = valid_epoch.run(valid_loader)
    
    # do something (save model, change lr, etc.)
    if max_score < valid_logs['iou_score']:
        max_score = valid_logs['iou_score']
        torch.save(model, './best_model.pth')
        print('Model saved!')
        
    scheduler.step(i)


Epoch: 0
train:   0%|          | 0/2646 [00:00<?, ?it/s](2176, 1408, 3)
torch.Size([1, 3, 2176, 1408])
(2176, 1408, 3)
torch.Size([1, 3, 2176, 1408])
(2176, 1408, 3)
torch.Size([1, 8, 2176, 1408])
torch.Size([1, 3, 2176, 1408])
(2176, 1408, 3)
torch.Size([1, 3, 2176, 1408])
(2176, 1408, 3)
(2176, 1408, 3)
torch.Size([1, 3, 2176, 1408])
torch.Size([1, 8, 2176, 1408])
torch.Size([1, 3, 2176, 1408])
(2176, 1408, 3)
torch.Size([1, 3, 2176, 1408])
(2176, 1408, 3)
torch.Size([1, 8, 1088, 704])
(2176, 1408, 3)
torch.Size([1, 3, 2176, 1408])
torch.Size([1, 3, 2176, 1408])
(2176, 1408, 3)
torch.Size([1, 3, 2176, 1408])
torch.Size([1, 8, 2176, 1408])
torch.Size([1, 8, 1088, 704])
torch.Size([1, 8, 1088, 704])
(2176, 1408, 3)
(2176, 1408, 3)
torch.Size([1, 3, 2176, 1408])
torch.Size([1, 3, 2176, 1408])
torch.Size([1, 3, 544, 352])
torch.Size([1, 8, 2176, 1408])
torch.Size([1, 8, 2176, 1408])
torch.Size([1, 8, 2176, 1408])
torch.Size([1, 8, 1088, 704])
torch.Size([1, 8, 2176, 1408])
torch.Size([

  return self.activation(x)


torch.Size([1, 3, 544, 352])
torch.Size([1, 3, 2176, 1408])
train:   0%|          | 1/2646 [00:16<12:13:38, 16.64s/it, dice_loss - 0.7693, iou_score - 0.03146]torch.Size([1, 8, 1088, 704])
torch.Size([1, 8, 1088, 704])
train:   0%|          | 2/2646 [00:16<8:37:52, 11.75s/it, dice_loss - 0.7712, iou_score - 0.03052] torch.Size([1, 3, 544, 352])
torch.Size([1, 3, 544, 352])
train:   0%|          | 3/2646 [00:17<6:14:28,  8.50s/it, dice_loss - 0.8051, iou_score - 0.02997]torch.Size([1, 8, 2176, 1408])
train:   0%|          | 4/2646 [00:18<4:29:51,  6.13s/it, dice_loss - 0.8124, iou_score - 0.03567](2176, 1408, 3)
train:   0%|          | 5/2646 [00:18<3:12:44,  4.38s/it, dice_loss - 0.8008, iou_score - 0.03822]torch.Size([1, 3, 2176, 1408])
train:   0%|          | 7/2646 [00:19<1:41:51,  2.32s/it, dice_loss - 0.7862, iou_score - 0.04256](2176, 1408, 3)
train:   0%|          | 8/2646 [00:19<1:16:18,  1.74s/it, dice_loss - 0.7808, iou_score - 0.04691]torch.Size([1, 8, 1088, 704])
train:   0

KeyboardInterrupt: ignored

In [0]:
a = torch.ones(2, 2)
b = torch.zeros(3, 3)

torch.cat([a, b], dim=1)

## Test best saved model

In [0]:
# load best saved checkpoint
best_model = torch.load('./best_model.pth')

In [0]:
# create test dataset
test_dataset = Dataset(
    x_test_dir, 
    y_test_dir, 
    augmentation=get_validation_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

test_dataloader = DataLoader(test_dataset)

In [0]:
# evaluate model on test set
test_epoch = smp.utils.train.ValidEpoch(
    model=best_model,
    loss=loss,
    metrics=metrics,
    device=DEVICE,
)

logs = test_epoch.run(test_dataloader)

## Visualize predictions

In [0]:
# test dataset without transformations for image visualization
test_dataset_vis = Dataset(
    x_test_dir, y_test_dir, 
    classes=CLASSES,
)

In [0]:
for i in range(5):
    n = np.random.choice(len(test_dataset))
    
    image_vis = test_dataset_vis[n][0].astype('uint8')
    image, gt_mask = test_dataset[n]
    
    gt_mask = gt_mask.squeeze()
    
    x_tensor = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
    pr_mask = best_model.predict(x_tensor)
    pr_mask = (pr_mask.squeeze().cpu().numpy().round())
        
    visualize(
        image=image_vis, 
        ground_truth_mask=gt_mask, 
        predicted_mask=pr_mask
    )

In [84]:
image_path = coco.imgs[843]['file_name']
image = cv2.imread(os.path.join(src_dir, 'images', image_path))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

tensor_image = transforms.ToTensor()(image).unsqueeze(0) #torch.Size([1, 3, 744, 744])
# 이미지를 압축한다
image = compress_model(tensor_image).squeeze()        
# tensor에서 numpy로 자료형 변경
image = image.permute(1, 2, 0).detach().numpy()

torch.Size([1, 3, 3000, 3000])
torch.Size([1, 8, 3000, 3000])
torch.Size([1, 8, 1500, 1500])
torch.Size([1, 8, 1500, 1500])
torch.Size([1, 3, 750, 750])
