In [1]:
## Library Imports
import glob
import os
import albumentations as albu
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as img
import numpy as np
from tqdm import tqdm
import json

In [3]:
# Initialize the data class
categories = [
    {"supercategory": "cell_st","id": 1,"name": "cell"},
]

In [4]:
# Define the data paths
img_root = "../TCIA_SegPC_dataset/validation/x/"
mask_root = "../TCIA_SegPC_dataset/validation/y/"
dest_root = "../TCIA_SegPC_dataset/coco_val/"
names = os.listdir(img_root)

In [5]:
# Create the destination folders
os.makedirs(dest_root, exist_ok=True)
os.makedirs(dest_root+'x', exist_ok=True)
os.makedirs(dest_root+'instance_y', exist_ok=True)
os.makedirs(dest_root+'semantic_y', exist_ok=True)

images = []
annos = []
res_size=(1080,1440)

In [6]:
# Iterate over the images
for name in tqdm(names):

    # Read the image
    image = np.array(Image.open(img_root+name))

    # Resize the image
    image= cv2.resize(image, res_size[::-1],interpolation=cv2.INTER_NEAREST)
    
    # Convert numpy array to image and save
    new_im = Image.fromarray(image)
    new_im.save(dest_root+'x/'+name)

    # Get image shape
    h,w,_ = image.shape
    index = name[:-4]

    # Create the image info
    img_info = {}
    img_info['file_name'] = name
    img_info['height'] = h
    img_info['width'] = w
    img_info['id'] = int(index)
    images.append(img_info)

    # initialize the semantic mask
    semantic_mask = np.zeros(res_size)

    # read the masks
    mask_list = glob.glob(mask_root+index+"_*")
    
    # iterate over the masks
    count = 0
    for mask_name in mask_list: 
        count+=1
        ann = {}
        # Read the mask
        mask = cv2.imread(mask_name, 0)
        # resize the mask
        mask= cv2.resize(mask, res_size[::-1], interpolation=cv2.INTER_NEAREST)
        # normalize the mask
        semantic_mask = np.maximum(semantic_mask,mask)
        # Get the mask id
        mask_id = mask_name.split('/')[-1][:-4]
        # Save the mask
        img.imsave(dest_root+'instance_y/'+mask_id+'.bmp', mask)
        # Create the binary mask
        bin_mask = np.zeros(mask.shape)
        bin_mask[mask>0] = 1
        
        # Find the contours
        res = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    
        # Create the annotation
        ann['id'] = mask_id
        ann['image_id'] = int(index)
        ann['segmentation'] = []
        a = res[0][0]
        mx = 0

        # Find the largest contour
        for i in res[0]:
            if i.shape[0]>mx:
                mx = i.shape[0]
                a = i
        ann['area'] =  cv2.contourArea(a)
        a = a.squeeze()

        # Find the bounding box
        max_x, max_y = np.max(a, axis =0)
        min_x, min_y = np.min(a, axis =0)

        # Create the segmentation
        seg = a.ravel()
        seg = seg.astype('float64')
        ann['segmentation'].append(seg.tolist())

        # Create the bounding box
        ann["bbox"] =  [float(min_x-0.5), float(min_y-0.5), float(max_x-min_x+1), float(max_y-min_y+1)]
        ann["iscrowd"]= 0
        ann["category_id"] = 1

        # Append the annotation
        annos.append(ann)
    
    # Save the semantic mask
    semantic_mask = (semantic_mask>0)*255
    cv2.imwrite(dest_root+'semantic_y/'+name,semantic_mask)

100%|██████████| 200/200 [05:19<00:00,  1.60s/it]


In [7]:
# Create the dataset dictionary
dataset = {
    "licenses": [],
    "images": images,
    "annotations": annos,
    "categories": categories,
}

# Save the dataset in COCO format
with open(dest_root+'COCO.json', 'w') as fp:
    json.dump(dataset, fp)

In [8]:
print("number of images saved: ", len(os.listdir(dest_root+'x')))
print("number of instances saved: ", len(os.listdir(dest_root+'instance_y')))

number of images saved:  200
number of instances saved:  990
