Current competition metric implies segmenation task. However one valid approach could incorporate object detection. In this direcrion and borrowing stuff from Kevin's excellent kernel [https://www.kaggle.com/kmader/baseline-u-net-model-part-1](http://), we attempt to extract bounding boxes information from binary rle-encoded masks.

In [1]:
import os
import cv2
from tqdm import tqdm
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from skimage.io import imread
import matplotlib.pyplot as plt
from skimage.segmentation import mark_boundaries
from skimage.measure import label, regionprops
from skimage.util.montage import montage2d as montage
montage_rgb = lambda x: np.stack([montage(x[:, :, :, i]) for i in range(x.shape[3])], -1)
ship_dir = '../input'
test_image_dir = os.path.join(ship_dir, 'test')

from skimage.morphology import label
def multi_rle_encode(img):
    labels = label(img[:, :, 0])
    return [rle_encode(labels==k) for k in np.unique(labels[labels>0])]

# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape=(768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to RLE direction

def masks_as_image(in_mask_list, all_masks=None):
    # Take the individual ship masks and create a single mask array for all ships
    if all_masks is None:
        all_masks = np.zeros((768, 768), dtype = np.int16)
    #if isinstance(in_mask_list, list):
    for mask in in_mask_list:
        if isinstance(mask, str):
            all_masks += rle_decode(mask)
    return np.expand_dims(all_masks, -1)

Let us read the masks:

In [2]:
masks = pd.read_csv(os.path.join('../input/',
                                 'test_ship_segmentations.csv'))
print(masks.shape[0], 'masks found')
print(masks['ImageId'].value_counts().shape[0])
masks.head()

100693 masks found
88486


Unnamed: 0,ImageId,EncodedPixels
0,0001124c7.jpg,
1,000194a2d.jpg,51834 9 52602 9 53370 9 54138 9 54906 9 55674 ...
2,000194a2d.jpg,360486 1 361252 4 362019 5 362785 8 363552 10 ...
3,000194a2d.jpg,254389 9 255157 17 255925 17 256693 17 257461 ...
4,000194a2d.jpg,198320 10 199088 10 199856 10 200624 10 201392...


and keep only those that contain ships. Keep in mind that image files can be repeated many times in the csv file. So a unique operator will give us the unique filenames that contain ships.

In [3]:
images_with_ship = masks.ImageId[masks.EncodedPixels.isnull()==False]
images_with_ship = np.unique(images_with_ship.values)
print('There are ' +str(len(images_with_ship)) + ' image files with masks')
images_with_ship.shape[0]

There are 13486 image files with masks


13486

In [4]:
def cut_image(im, desired_size, x, y, w, h):
    # cut subimage has top-left = (x,y) and (width,height) = (w,h) with desired size presets
    # print(str(x) + ' ' + str(y) + ' ' + str(w) + ' ' + str(h))

    centerX = x + w/2
    centerY = y + h/2

    if w <= desired_size and h <= desired_size:
        newSize = desired_size
    else:
        if w < h:
            newSize = h
        else:
            newSize = w

    newWidth = newSize
    newHeight = newSize
    newX = centerX - newSize/2
    newY = centerY - newSize/2

    if newX < 0:
        newX = 0
    if newY < 0:
        newY = 0
    if newX + newWidth >= im.shape[1]:
        newX = im.shape[1] - newWidth
    if newY + newHeight >= im.shape[0]:
        newY = im.shape[0] - newHeight

    subimg = im[int(newY): int(newY + newHeight),int(newX):int(newX + newWidth)]

    # print(str(newX) + ' ' + str(newY) + ' ' + str(newWidth) + ' ' + str(newHeight))

    if newSize > desired_size:
        subimg = cv2.resize(subimg, (desired_size, desired_size), interpolation=cv2.INTER_CUBIC)

    return subimg, newX, newY, newWidth, newHeight

In order to extract the bounding box we:
1. Load mask as binary numpy array using Kevin's `masks_as_image`)

2. Label  connected regions of this mask using `skimage.measure.label`

3. Measure morphological properties of these connected regions and keep the bounding box (`skimage.measure.regionprops`). For each connected region a bounding box of the form  (min_row, min_col, max_row, max_col) is returned.  

(*Note: Ships masks touching each other would be considered as one. See Image 00021ddc3.jpg below. This may hurt detection performance but we can find ways to further split them !* )

Let us view some  examples:

In [18]:
for i in range(images_with_ship.shape[0]):
    image = images_with_ship[i]

#     fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize = (15, 5))
    img_0 = cv2.imread(test_image_dir+'/' + image)
    rle_0 = masks.query('ImageId=="'+image+'"')['EncodedPixels']
    mask_0 = masks_as_image(rle_0)
    #
    # 
    lbl_0 = label(mask_0) 
    props = regionprops(lbl_0)
    img_1 = img_0.copy()
    print ('Image', image)
    j = 0
    for prop in props:
        print('Found bbox', prop.bbox)
        cv2.rectangle(img_1, (prop.bbox[1], prop.bbox[0]), (prop.bbox[3], prop.bbox[2]), (255, 0, 0), 2)
#         roi = img_0[prop.bbox[0]:prop.bbox[2], prop.bbox[1]:prop.bbox[3]]
#         roi = cv2.resize(roi, (32, 32)) 
        roi, newX, newY, newWidth, newHeight = cut_image(img_0, 80, prop.bbox[1], prop.bbox[0], prop.bbox[3]-prop.bbox[1], prop.bbox[2]-prop.bbox[0])
        cv2.imwrite(os.path.join('./', 'ship80/')+image[:9]+'_'+str(j)+'.jpg', roi)
        j = j + 1

#     ax1.imshow(img_0)
#     ax1.set_title('Image')
#     ax2.set_title('Mask')
#     ax3.set_title('Image with derived bounding box')
#     ax2.imshow(mask_0[...,0], cmap='gray')
#     ax3.imshow(img_1)
#     plt.show()

Image 000194a2d.jpg
Found bbox (377, 67, 387, 85)
Found bbox (174, 258, 185, 306)
Found bbox (178, 331, 197, 370)
Found bbox (287, 469, 307, 492)
Image 000532683.jpg
Found bbox (460, 597, 474, 604)
Found bbox (756, 738, 763, 747)
Image 0005d6d95.jpg
Found bbox (171, 345, 190, 367)
Image 001234638.jpg
Found bbox (401, 170, 530, 327)
Image 0017c19d6.jpg
Found bbox (514, 428, 527, 442)
Found bbox (449, 528, 469, 548)
Image 0018e5d6c.jpg
Found bbox (127, 72, 205, 355)
Image 001bcf222.jpg
Found bbox (345, 285, 361, 307)
Image 001ed8b9b.jpg
Found bbox (183, 8, 276, 338)
Image 0022f64e5.jpg
Found bbox (627, 135, 662, 204)
Image 0027854cc.jpg
Found bbox (316, 0, 381, 84)
Image 002868a5c.jpg
Found bbox (622, 226, 630, 232)
Image 00286e8b9.jpg
Found bbox (471, 445, 590, 569)
Image 002943412.jpg
Found bbox (594, 186, 719, 330)
Found bbox (140, 268, 193, 342)
Found bbox (215, 391, 236, 416)
Image 003520305.jpg
Found bbox (553, 1, 576, 68)
Found bbox (711, 57, 736, 109)
Found bbox (147, 110, 176, 1

Image 020a2bace.jpg
Found bbox (692, 455, 703, 466)
Image 020f1afe8.jpg
Found bbox (302, 174, 482, 461)
Image 0213b298f.jpg
Found bbox (752, 491, 765, 506)
Image 021726b60.jpg
Found bbox (600, 554, 613, 564)
Image 022c0e756.jpg
Found bbox (47, 665, 60, 678)
Image 023240c31.jpg
Found bbox (607, 146, 707, 274)
Image 0235db857.jpg
Found bbox (249, 165, 278, 330)
Found bbox (372, 175, 400, 340)
Found bbox (51, 191, 85, 415)
Found bbox (7, 225, 33, 399)
Found bbox (240, 404, 263, 573)
Found bbox (746, 406, 768, 591)
Found bbox (0, 447, 22, 616)
Image 023881a49.jpg
Found bbox (564, 225, 604, 381)
Image 023a8c6b4.jpg
Found bbox (435, 381, 464, 493)
Image 023aec5ee.jpg
Found bbox (89, 457, 99, 466)
Image 0240fc4b8.jpg
Found bbox (69, 499, 192, 685)
Image 02421d07b.jpg
Found bbox (90, 61, 99, 71)
Image 024248678.jpg
Found bbox (211, 134, 220, 150)
Found bbox (262, 142, 268, 156)
Found bbox (349, 210, 356, 219)
Found bbox (167, 498, 174, 512)
Image 025085ccc.jpg
Found bbox (625, 218, 649, 242)
I

Image 048d10581.jpg
Found bbox (74, 0, 230, 148)
Found bbox (262, 351, 492, 607)
Image 0490b090a.jpg
Found bbox (537, 87, 571, 115)
Found bbox (672, 399, 721, 423)
Found bbox (267, 479, 307, 514)
Image 049a04990.jpg
Found bbox (259, 463, 298, 500)
Image 04a2c262a.jpg
Found bbox (516, 218, 594, 290)
Found bbox (708, 256, 742, 280)
Found bbox (640, 261, 683, 287)
Found bbox (307, 444, 352, 488)
Found bbox (299, 459, 311, 473)
Found bbox (293, 474, 301, 485)
Found bbox (204, 638, 244, 684)
Image 04a3438da.jpg
Found bbox (45, 510, 53, 527)
Found bbox (90, 513, 101, 527)
Image 04a48c914.jpg
Found bbox (349, 719, 371, 761)
Found bbox (273, 741, 281, 751)
Image 04c3aee37.jpg
Found bbox (564, 68, 573, 97)
Found bbox (241, 333, 261, 381)
Found bbox (717, 512, 724, 534)
Image 04cb74d0a.jpg
Found bbox (225, 728, 232, 739)
Image 04cc0a4cc.jpg
Found bbox (118, 21, 218, 175)
Image 04ce44ea6.jpg
Found bbox (398, 325, 413, 346)
Image 04cefef36.jpg
Found bbox (731, 344, 742, 356)
Found bbox (436, 532, 

Image 069413cf8.jpg
Found bbox (672, 60, 683, 71)
Found bbox (25, 71, 31, 80)
Found bbox (488, 104, 493, 112)
Found bbox (544, 123, 555, 129)
Found bbox (461, 229, 474, 238)
Image 0697e0958.jpg
Found bbox (48, 521, 93, 611)
Image 069fc657b.jpg
Found bbox (442, 649, 465, 670)
Image 06a1b0ec4.jpg
Found bbox (134, 606, 159, 714)
Image 06b152f8c.jpg
Found bbox (519, 560, 565, 644)
Image 06b5f1274.jpg
Found bbox (87, 453, 117, 514)
Image 06b6b6570.jpg
Found bbox (232, 59, 316, 244)
Found bbox (47, 560, 144, 762)
Image 06b8c3648.jpg
Found bbox (616, 0, 636, 12)
Image 06bb4c8f2.jpg
Found bbox (728, 132, 737, 140)
Image 06c880a4c.jpg
Found bbox (40, 216, 136, 303)
Image 06d90df7d.jpg
Found bbox (473, 99, 497, 194)
Found bbox (147, 267, 188, 367)
Image 06db2136f.jpg
Found bbox (580, 372, 740, 419)
Image 06db6422a.jpg
Found bbox (656, 218, 676, 239)
Image 06dd3e135.jpg
Found bbox (114, 7, 187, 72)
Found bbox (475, 543, 768, 768)
Image 06e3a2388.jpg
Found bbox (425, 131, 491, 249)
Image 06e598746

AttributeError: 'NoneType' object has no attribute 'copy'

Here we calculate the bounding boxes for all `29070` images and save then into a dictionary. 

In [None]:
import gc 
bboxes_dict = {}
i = 0
count_ships = 0
for image in tqdm(images_with_ship):
    img_0 = cv2.imread(test_image_dir+'/' + image)
    rle_0 = masks.query('ImageId=="'+image+'"')['EncodedPixels']
    mask_0 = masks_as_image(rle_0)
    

    #
    # 
    lbl_0 = label(mask_0) 
    props = regionprops(lbl_0)
    bboxes = []
    count_ships = count_ships + len(props)
    for prop in props:
        bboxes.append(prop.bbox)
        
        
    i = i + 1
    if i % 500 == 0:
        gc.collect()    

    bboxes_dict[image] = bboxes.copy()

Let us plot some bounding boxes right from the dictionary we just created. 

In [None]:
dict_images = list(bboxes_dict.keys())
for i in range(5):
    image = dict_images[10+i]
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize = (15, 5))
    img_0 = cv2.imread(train_image_dir+'/' + image)
    rle_0 = masks.query('ImageId=="'+image+'"')['EncodedPixels']
    mask_0 = masks_as_image(rle_0)
    img_1 = img_0.copy()
    bboxs = bboxes_dict[image]
    for bbox in bboxs:
        cv2.rectangle(img_1, (bbox[1], bbox[0]), (bbox[3], bbox[2]), (255, 0, 0), 2)


    ax1.imshow(img_0)
    ax2.imshow(mask_0[...,0], cmap='gray')
    ax3.imshow(img_1)
    plt.show()

The final touch.. I export these bounding boxes for everyone to use in a Pandas dataframe form.

In [None]:
bboxes_df = pd.DataFrame([bboxes_dict])
bboxes_df = bboxes_df.transpose()
bboxes_df.columns = ['bbox_list']
bboxes_df.head()

In [None]:
bboxes_df.to_csv('bbox_dictionary.csv')