In [33]:
import json
import numpy as np
import cv2
from matplotlib import pyplot as plt
from pycocotools import mask as mutils
from tqdm import tqdm
import pandas as pd
import os

In [34]:
filepath = './results.segm.json'

In [35]:
with open(filepath) as json_file:
    json_data = json.load(json_file)

In [36]:
def rle_encode(mask):
    pixels = mask.T.flatten()
    # We need to allow for cases where there is a '1' at either end of the sequence.
    # We do this by padding with a zero at each end when needed.
    use_padding = False
    if pixels[0] or pixels[-1]:
        use_padding = True
        pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype)
        pixel_padded[1:-1] = pixels
        pixels = pixel_padded
    rle = np.where(pixels[1:] != pixels[:-1])[0] + 2
    if use_padding:
        rle = rle - 1
    rle[1::2] = rle[1::2] - rle[:-1:2]
    return rle


def rle_to_string(runs):
    return ' '.join(str(x) for x in runs)


# Used only for testing.
# This is copied from https://www.kaggle.com/paulorzp/run-length-encode-and-decode.
# Thanks to Paulo Pinto.
def rle_decode(rle_str, mask_shape, mask_dtype):
    s = rle_str.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    mask = np.zeros(np.prod(mask_shape), dtype=mask_dtype)
    for lo, hi in zip(starts, ends):
        mask[lo:hi] = 1
    return mask.reshape(mask_shape[::-1]).T


In [37]:
encoded_pixels = []
img_ids = []
height = []
width = []
category_ids = []

for i in tqdm(range(len(json_data))):
    encoded_pixels.append(rle_to_string(rle_encode(mutils.decode(json_data[i]['segmentation']))))
    img_ids.append(json_data[i]['image_id'])
    category_ids.append(json_data[i]['category_id'])
    height.append(json_data[i]['segmentation']['size'][0])
    width.append(json_data[i]['segmentation']['size'][1])


100%|██████████| 16228/16228 [00:32<00:00, 504.29it/s]


In [38]:
data = {'ImageId': img_ids,
        'EncodedPixels': encoded_pixels,
        'Height': height,
        'Width': width,
        'CategoryId': category_ids}

In [39]:
submission = pd.DataFrame(data)
answer_dummy = submission.sample(50)


In [40]:
answer_dummy.head()

Unnamed: 0,ImageId,EncodedPixels,Height,Width,CategoryId
5072,62100,125335 5 126131 18 126927 30 127722 37 128518 ...,800,800,20
3472,235887,163059 1 163853 11 164648 18 165446 23 166243 ...,800,800,9
13880,152277,134287 21 135084 29 135882 32 136679 37 137476...,800,800,8
4255,188648,247424 6 248219 16 248908 55 249014 26 249685 ...,800,800,2
13508,96032,173892 7 174690 11 175488 15 176286 19 177084 ...,800,800,20


In [41]:
submission.to_csv('last_text.csv', index=False)
answer_dummy.to_csv('answer_dummy.csv', index=False)

In [42]:
! pwd

/home/ailab/code/kt/mmdetection
