In [1]:
from PIL import Image
import os
from uuid import uuid4
import numpy as np
from tqdm import tqdm

In [2]:
labels = [subdir for subdir in os.listdir("masks") if not subdir.startswith(".")]

In [3]:
labels

['others', 'scar', 'drusen', 'hemorrhage', 'exudate']

In [4]:
image_masks = {}
for label in labels:
    for file in os.listdir(os.path.join("masks", label)):
        if file.startswith("."):
            continue
        image_dict = image_masks.get(file[:-4], {})

        image_dict[label] = os.path.join("masks", label, file)
        
        image_masks[file[:-4]] = image_dict

In [16]:
def get_mask(mask_arr):
    mask_indices = np.where(mask_arr == 0)
    if len(mask_indices[0]) == 0:
        return None
    max_y, max_x = mask_arr.shape
    
    new_mask = np.full(mask_arr.shape, 255)
    new_x_arr = [mask_indices[1][0]]
    new_y_arr = [mask_indices[0][0]]
    while len(new_x_arr) > 0:
        x, y = new_x_arr[0], new_y_arr[0]
        new_x_arr = new_x_arr[1:]
        new_y_arr = new_y_arr[1:]
        new_mask[y][x] = 0
        if x + 1 < max_x and mask_arr[y][x+1] == 0:
            new_x_arr.append(x+1)
            new_y_arr.append(y)
            mask_arr[y][x+1] = 255
        if x - 1 >= 0 and mask_arr[y][x-1] == 0:
            new_x_arr.append(x-1)
            new_y_arr.append(y)
            mask_arr[y][x-1] = 255
        if x + 1 < max_x and y + 1 < max_y and mask_arr[y+1][x+1] == 0:
            new_x_arr.append(x+1)
            new_y_arr.append(y+1)
            mask_arr[y+1][x+1] = 255
        if x - 1 >= 0 and y + 1 < max_y and mask_arr[y+1][x-1] == 0:
            new_x_arr.append(x-1)
            new_y_arr.append(y+1)
            mask_arr[y+1][x-1] = 255
        if x + 1 < max_x and y - 1 >= 0 and mask_arr[y-1][x+1] == 0:
            new_x_arr.append(x+1)
            new_y_arr.append(y-1)
            mask_arr[y-1][x+1] = 255
        if x - 1 >= 0 and y - 1 >= 0 and mask_arr[y-1][x-1] == 0:
            new_x_arr.append(x-1)
            new_y_arr.append(y-1)
            mask_arr[y-1][x-1] = 255
        if y - 1 >= 0 and mask_arr[y-1][x] == 0:
            new_x_arr.append(x)
            new_y_arr.append(y-1)
            mask_arr[y-1][x] = 255
        if y + 1 < max_y and mask_arr[y+1][x] == 0:
            new_x_arr.append(x)
            new_y_arr.append(y+1)
            mask_arr[y+1][x] = 255
    return new_mask

def split_instance_from_mask(image_path, output_folder):
    mask = np.array(Image.open(image_path))
    new_mask = get_mask(mask)
    while new_mask is not None:
        Image.fromarray(new_mask.astype(np.uint8)).save(os.path.join(output_folder, "{}.jpg".format(uuid4())))
        new_mask = get_mask(mask)

In [17]:
import pickle
with open("images.pkl", "rb") as f:
    image_masks = pickle.load(f)

image_keys =list(image_masks.keys())[:25]
for image in image_keys:
    image_folder = os.path.join("data", image, "images")
    masks_folder = os.path.join("data", image, "masks")
    !mkdir -p {image_folder}
    !cp images/{image+".jpg"} {image_folder}
    masks = image_masks[image]
    for mask in masks:
        print(image, mask)
        mask_folder = os.path.join(masks_folder, mask)
        !mkdir -p {mask_folder}
        split_instance_from_mask(str(masks[mask]),str(mask_folder))

N0042 others
N0042 exudate
N0043 others
A0029 others
N0132 others
N0130 others
N0131 others
N0181 others
N0208 others
N0180 others
N0169 others
A0073 others
N0168 others
N0201 others
N0200 others
N0200 drusen
A0080 others
N0167 others
A0030 others
A0013 scar
A0002 scar
A0002 hemorrhage
A0002 exudate
A0016 scar
A0016 hemorrhage
A0071 scar
A0071 hemorrhage
A0076 scar
A0075 scar
A0082 scar
A0069 scar
A0069 drusen
A0069 exudate
