In [75]:
import os
import cv2
import numpy as np
import json
import shutil
from tqdm import tqdm
from PIL import Image
from sklearn.model_selection import train_test_split

In [76]:
FOLDER_PATH = "./train/"
image_ids = next(os.walk(FOLDER_PATH))[1]
image_ids[:5]

['00071198d059ba7f5914a526d124d28e6d010c92466da21d4a04cd5413362552',
 '003cee89357d9fe13516167fd67b609a164651b21934585648c740d2c3d86dc1',
 '00ae65c1c6631ae6f2be1a449902976e6eb8483bf6b0740d00530220832c6d3e',
 '0121d6759c5adb290c8e828fc882f37dfaf3663ec885c663859948c154a443ed',
 '01d44a26f6680c42ba94c9bc6339228579a95d0e2695b149b7cc0c9592b21baf']

In [77]:
def get_image_mask_pairs(data_dir):
    image_paths = []
    mask_paths = []
    # train_ids = next(os.walk(data_dir))[1]
    mask_ids = []
    
    for n, id_ in tqdm(enumerate(image_ids), total=len(image_ids)):
        path = data_dir + id_
        image_paths.append(path + '/images/' + id_ + '.png')
        for mask_file in next(os.walk(path + '/masks/'))[2]:
            mask_paths.append(path + '/masks/' + mask_file)
            mask_ids.append(mask_file[:-4])
    

    return image_paths, mask_paths, mask_ids

image_paths, mask_paths, mask_ids = get_image_mask_pairs(FOLDER_PATH)
mask_ids[:5]

100%|██████████████████████████████████████████████████████████████████████████████| 670/670 [00:00<00:00, 4496.65it/s]


['07a9bf1d7594af2763c86e93f05d22c4d5181353c6d3ab30a345b908ffe5aadc',
 '0e548d0af63ab451616f082eb56bde13eb71f73dfda92a03fbe88ad42ebb4881',
 '0ea1f9e30124e4aef1407af239ff42fd6f5753c09b4c5cac5d08023c328d7f05',
 '0f5a3252d05ecdf453bdd5e6ad5322c454d8ec2d13ef0f0bf45a6f6db45b5639',
 '2c47735510ef91a11fde42b317829cee5fc04d05a797b90008803d7151951d58']

In [78]:
def mask_to_polygons(mask, epsilon=1.0):
    contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    polygons = []
    for contour in contours:
        if len(contour) > 2:
            poly = contour.reshape(-1).tolist()
            if len(poly) > 4:  # Ensure valid polygon
                polygons.append(poly)
    return polygons

In [79]:
coco_output = {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "Nuclei"}]
    }

In [80]:
# image_id = 0
# annotation_id = 0

annotations = []
images = []

In [82]:
# for img_path, mask_path in zip(image_paths, mask_paths):
for img_path, image_id in zip(image_paths , image_ids):
    image_id = image_id
    img = cv2.imread(img_path)
    # mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)

    images.append({
            "id": image_id,
            "file_name": os.path.basename(img_path),
            "height": img.shape[0],
            "width": img.shape[1]
        })

    masks_folder = FOLDER_PATH + image_id + '/masks/'
    mask_filenames = [f for f in os.listdir(masks_folder)]
    for mask_filename in mask_filenames:
            mask_path = os.path.join(masks_folder, mask_filename)
            mask = Image.open(mask_path)
            mask_data = np.array(mask)
            unique_labels = np.unique(mask_data)
            annotation_id = mask_filename[:-4]

            for label in unique_labels:
                if label != 0:
                    # Create the annotation for the current mask
                    segmentations = []
                    contours, _ = cv2.findContours((mask_data == label).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
                    for contour in contours:
                        if len(contour) > 2:
                            seg = contour.reshape(-1).tolist()
                            if len(seg) > 4:  # Ensure valid polygon
                                segmentations.append(seg)

                    
                    
                    # polygons = []
                    # for contour in contours:
                    #     if len(contour) > 2:
                    #         segmentation = contour.reshape(-1).tolist()
                    #         if len(segmentation) > 4:
                    #             polygons.append(poly)
                        # segmentation.extend(contour.flatten().tolist())
                    for segmentation in segmentations:
                        annotations.append({
                            "id": annotation_id,
                            "image_id": image_id,
                            "category_id": 1,
                            "segmentation": [segmentation],
                            "area": cv2.contourArea(np.array(segmentation).reshape(-1, 2)),
                            "bbox": list(cv2.boundingRect(np.array(segmentation).reshape(-1, 2))),
                            "iscrowd": 0
                        })
                        # annotation_id += 1
coco_output = {
        "images": images,
        "annotations": annotations,
        "categories": [{"id": 1, "name": "Nuclei"}]
    }     
    

In [87]:
with open('coco_annotations.json', 'w') as f:
    json.dump(coco_output, f, indent=2)

In [89]:
output_dir = "./just_images/" 
for img_path, in zip(image_paths):        
        # Copy image to output directory
        shutil.copy(img_path, os.path.join(output_dir, os.path.basename(img_path)))