In [1]:
import json
import numpy as np
import cv2
from matplotlib import pyplot as plt
from pycocotools import mask as mutils
from tqdm import tqdm
import pandas as pd
import os

In [2]:
filepath = '../sample.json'

In [3]:
with open(filepath) as json_file:
    json_data = json.load(json_file)

In [4]:
def rle_encode(mask):
    pixels = mask.T.flatten()
    # We need to allow for cases where there is a '1' at either end of the sequence.
    # We do this by padding with a zero at each end when needed.
    use_padding = False
    if pixels[0] or pixels[-1]:
        use_padding = True
        pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype)
        pixel_padded[1:-1] = pixels
        pixels = pixel_padded
    rle = np.where(pixels[1:] != pixels[:-1])[0] + 2
    if use_padding:
        rle = rle - 1
    rle[1::2] = rle[1::2] - rle[:-1:2]
    return rle


def rle_to_string(runs):
    return ' '.join(str(x) for x in runs)


# Used only for testing.
# This is copied from https://www.kaggle.com/paulorzp/run-length-encode-and-decode.
# Thanks to Paulo Pinto.
def rle_decode(rle_str, mask_shape, mask_dtype):
    s = rle_str.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    mask = np.zeros(np.prod(mask_shape), dtype=mask_dtype)
    for lo, hi in zip(starts, ends):
        mask[lo:hi] = 1
    return mask.reshape(mask_shape[::-1]).T

In [5]:
encoded_pixels = []
img_ids = []
height = []
width = []
category_ids = []

for i in tqdm(range(len(json_data))):
    encoded_pixels.append(rle_to_string(rle_encode(mutils.decode(json_data[i]['segmentation']))))
    img_ids.append(json_data[i]['image_id'])
    category_ids.append(json_data[i]['category_id'])
    height.append(json_data[i]['segmentation']['size'][0])
    width.append(json_data[i]['segmentation']['size'][1])

100%|██████████| 100/100 [00:00<00:00, 778.32it/s]


In [6]:
data = {'ImageId': img_ids,
        'EncodedPixels': encoded_pixels,
        'Height': height,
        'Width': width,
        'CategoryId': category_ids}

In [8]:
submission = pd.DataFrame(data)
answer_dummy = submission.sample(50)

In [9]:
submission.head()

Unnamed: 0,ImageId,EncodedPixels,Height,Width,CategoryId
0,94535,184366 19 185161 30 185957 36 186752 43 187549...,800,800,1
1,94535,185970 7 186767 18 187564 28 188360 34 189156 ...,800,800,2
2,94535,185978 6 186764 22 187560 29 188356 35 189153 ...,800,800,3
3,86297,222868 47 223649 85 224411 135 225200 155 2259...,800,800,10
4,86297,180070 12 180865 27 181661 39 182456 47 183252...,800,800,20


In [None]:
answer_dummy.head()

In [None]:
submission.to_csv('submission.csv', index=False)
answer_dummy.to_csv('answer_dummy.csv', index=False)