In [1]:
import os
import xml
import shutil
import numpy as np
from os.path import join

from PIL import Image
from skimage import draw
from skimage.draw import polygon_perimeter

# MoNuSeg

In [10]:
def_im_size = (1000, 1000) # original image size
im_size = (1000, 1000)#(500, 500)

In [3]:
# NEW DATA FOLDERS
def mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)


data_dir = '../data/MoNuSeg_data'

train_imgs_dir = join(data_dir, 'train_images')
test_imgs_dir = join(data_dir, 'test_images')
all_masks_dir = join(data_dir, 'all_masks')
all_countuors_dir = join(data_dir, 'all_contours')

mkdir(data_dir)
mkdir(train_imgs_dir)
mkdir(test_imgs_dir)
mkdir(all_masks_dir)
mkdir(all_countuors_dir)


In [4]:
# RAW DATA FOLDERS
raw_data_dir = '../data/MoNuSeg'
raw_train_dir = join(raw_data_dir, 'MoNuSegTrainingData')

raw_train_imgs_dir = join(raw_train_dir, 'TissueImages')
raw_train_anno_dir = join(raw_train_dir, 'Annotations')


raw_test_dir = join(raw_data_dir, 'MoNuSegTestData')
raw_test_imgs_dir = join(raw_test_dir, 'TissueImages')
raw_test_anno_dir = join(raw_test_dir, 'Annotations')


In [5]:
'''
+ Copy Images to new folder
    + Train
    + Test
- Copy Masks to new folder
    + Make Masks train
    - Make Masks test
- Copy Countuors to new folder
    + Make Countuors train
    - Make Countuors test
'''

'\n+ Copy Images to new folder\n    + Train\n    + Test\n- Copy Masks to new folder\n    + Make Masks train\n    - Make Masks test\n- Copy Countuors to new folder\n    + Make Countuors train\n    - Make Countuors test\n'

In [11]:
# Copy Images to new folder
def copy_images(src_img_dir, dst_img_dir, im_size):
    imgs_fn = sorted([p for p in os.listdir(src_img_dir) if not 'ipynb' in p])

    for filename in imgs_fn:
        src = join(src_img_dir, filename)
        dst = join(dst_img_dir, filename[:-4]+'.png')
        
        img = Image.open(src)
        img = img.resize(im_size)
        img.save(dst, 'PNG')
        
    
# Train
copy_images(raw_train_imgs_dir, train_imgs_dir, im_size)

# Test
copy_images(raw_test_imgs_dir, test_imgs_dir, im_size)
    

In [7]:
# Made Masks and save to new folder
def get_coordinates(regions):
    # for each region tag
    # get a list of all the vertexes (which are in order)
    xy = []
    for region in regions:
        verticies = region.getElementsByTagName('Vertex')
        xy.append([])

        # iterate through all verticies
        for vertex in verticies:
            # get the x value of that verte
            x = float(vertex.getAttribute('X'))
            # get the y value of that vertex
            y = float(vertex.getAttribute('Y'))

            xy[-1].append([x,y])
    return xy

def poly2mask(vertex_row_coords, vertex_col_coords, shape):
    # https://github.com/scikit-image/scikit-image/issues/1103#issuecomment-52378754
    fill_row_coords, fill_col_coords = draw.polygon(
        vertex_row_coords, vertex_col_coords, shape)
    mask = np.zeros(shape, dtype=np.bool)
    mask[fill_row_coords, fill_col_coords] = True
    return mask


def coords_to_mask(coords, im_size):
    binary_mask = np.zeros(im_size, dtype=np.bool)
    color_mask = np.zeros((im_size[0], im_size[1], 3), dtype=np.int32)
    
    for i in range(len(coords)):
        points = np.array(coords[i])
        small_x = points[:,0]
        small_y = points[:,1]

        # make a mask and add it to the current mask
        # this addition makes it obvious when more than 
        # 1 layer overlap each.
        # Other, can be changed to simply an OR 
        # depending on application.

        polygon = poly2mask(small_x, small_y, im_size)

        binary_mask |= polygon

    return binary_mask.T

def coords_to_contours(coords, im_size):
    edges = np.zeros(im_size, dtype=np.uint8)
    
    for coord in coords:
        c = np.array(coord)
        # Poligon has to have at least 3 coordinats
        if len(c) > 2: 
            rr, cc = polygon_perimeter(c[:,1], c[:,0], shape=edges.shape,clip=True)
            edges[rr, cc] = 255

    return edges

def copy_masks(ann_dir, im_size):
    ann_dir = raw_train_anno_dir
#     def_im_size = (1000, 1000)
    
    # Start of the function
    anns_fn = sorted([p for p in os.listdir(ann_dir) if not 'ipynb' in p])

    for i, filename in enumerate(anns_fn):
        print('[{:2d}/{:3d}] {}'.format(i+1, len(anns_fn), filename))
        # Get annotation file
        file_path = join(ann_dir, filename)
        doc = xml.dom.minidom.parse(file_path)

        # Get Regions info
        regions = doc.getElementsByTagName('Region')
        coords = get_coordinates(regions)

        # Compute binary mask  
        binary_mask = coords_to_mask(coords, def_im_size)

        # Compute countuors
        countuors_mask = coords_to_contours(coords, def_im_size)

        # Save masks
        dst_bi = join(all_masks_dir, filename[:-4]+'.png')
        dst_co = join(all_countuors_dir, filename[:-4]+'.png')

        Image.fromarray(binary_mask).resize(im_size).save(dst_bi, 'PNG')
        Image.fromarray(countuors_mask).resize(im_size).save(dst_co, 'PNG')


In [9]:
im_size = (1000, 1000)

print('Train masks generation...')
copy_masks(raw_train_anno_dir, im_size)
print('Test masks generation...')
copy_masks(raw_test_anno_dir, im_size)

Train masks generation...
[ 1/ 30] TCGA-18-5592-01Z-00-DX1.xml
[ 2/ 30] TCGA-21-5784-01Z-00-DX1.xml
[ 3/ 30] TCGA-21-5786-01Z-00-DX1.xml
[ 4/ 30] TCGA-38-6178-01Z-00-DX1.xml
[ 5/ 30] TCGA-49-4488-01Z-00-DX1.xml
[ 6/ 30] TCGA-50-5931-01Z-00-DX1.xml
[ 7/ 30] TCGA-A7-A13E-01Z-00-DX1.xml
[ 8/ 30] TCGA-A7-A13F-01Z-00-DX1.xml
[ 9/ 30] TCGA-AR-A1AK-01Z-00-DX1.xml
[10/ 30] TCGA-AR-A1AS-01Z-00-DX1.xml
[11/ 30] TCGA-AY-A8YK-01A-01-TS1.xml
[12/ 30] TCGA-B0-5698-01Z-00-DX1.xml
[13/ 30] TCGA-B0-5710-01Z-00-DX1.xml
[14/ 30] TCGA-B0-5711-01Z-00-DX1.xml
[15/ 30] TCGA-CH-5767-01Z-00-DX1.xml
[16/ 30] TCGA-DK-A2I6-01A-01-TS1.xml
[17/ 30] TCGA-E2-A14V-01Z-00-DX1.xml
[18/ 30] TCGA-E2-A1B5-01Z-00-DX1.xml
[19/ 30] TCGA-G2-A2EK-01A-02-TSB.xml
[20/ 30] TCGA-G9-6336-01Z-00-DX1.xml
[21/ 30] TCGA-G9-6348-01Z-00-DX1.xml
[22/ 30] TCGA-G9-6356-01Z-00-DX1.xml
[23/ 30] TCGA-G9-6362-01Z-00-DX1.xml
[24/ 30] TCGA-G9-6363-01Z-00-DX1.xml
[25/ 30] TCGA-HE-7128-01Z-00-DX1.xml
[26/ 30] TCGA-HE-7129-01Z-00-DX1.xml
[27/ 30] TCG

In [41]:
import torch

a = torch.randint(0,100, (10,10))


tensor(5075)

In [53]:
a = '../../model/model-0.hdf5'
b = a.split('/')[:-1]
join(*b)

'../../model'