In [10]:
import os
import xml
import shutil
import numpy as np
from os.path import join
import scipy.spatial.distance as distance

import cv2
import scipy.io
from scipy import ndimage
from PIL import Image
from skimage import draw
from skimage.draw import polygon_perimeter

In [2]:
def mkdir(path):
    '''Make new dir'''
    if not os.path.exists(path):
        os.mkdir(path)

# MoNuSeg

In [3]:
def_im_size = (1000, 1000) # original image size
im_size = (1000, 1000)#(500, 500)

In [4]:
# NEW DATA FOLDERS

data_dir = '../data/MoNuSeg_data'

train_imgs_dir = join(data_dir, 'train_images')
test_imgs_dir = join(data_dir, 'test_images')
all_masks_dir = join(data_dir, 'all_masks')
all_countuors_dir = join(data_dir, 'all_contours')
all_centers_dir = join(data_dir, 'all_centers')

mkdir(data_dir)
mkdir(train_imgs_dir)
mkdir(test_imgs_dir)
mkdir(all_masks_dir)
mkdir(all_countuors_dir)
mkdir(all_centers_dir)


In [5]:
# RAW DATA FOLDERS
raw_data_dir = '../data/MoNuSeg'

raw_train_dir = join(raw_data_dir, 'MoNuSegTrainingData')
raw_train_imgs_dir = join(raw_train_dir, 'TissueImages')
raw_train_anno_dir = join(raw_train_dir, 'Annotations')

raw_test_dir = join(raw_data_dir, 'MoNuSegTestData')
raw_test_imgs_dir = join(raw_test_dir, 'TissueImages')
raw_test_anno_dir = join(raw_test_dir, 'Annotations')


In [6]:
# Copy Images to new folder    
def copy_images(src_img_dir, dst_img_dir, im_size=None, train=True):
    '''Copy Images to new folder'''
    imgs_fn = sorted([p for p in os.listdir(src_img_dir) if not 'ipynb' in p])

    for filename in imgs_fn:
        src = join(src_img_dir, filename)
        dst = join(dst_img_dir, filename[:-4]+'.png')
        
        img = Image.open(src)
        if im_size:
            img = img.resize(im_size)
        img.save(dst, 'PNG')
    
# Train
copy_images(raw_train_imgs_dir, train_imgs_dir, im_size)

# Test
copy_images(raw_test_imgs_dir, test_imgs_dir, im_size)
    

In [42]:
# Made Masks and save to new folder
def get_coordinates(regions):
    '''Get coordinated of polygons'''
    # for each region tag
    # get a list of all the vertexes (which are in order)
    xy = []
    for region in regions:
        verticies = region.getElementsByTagName('Vertex')
        xy.append([])

        # iterate through all verticies
        for vertex in verticies:
            # get the x value of that verte
            x = float(vertex.getAttribute('X'))
            # get the y value of that vertex
            y = float(vertex.getAttribute('Y'))

            xy[-1].append([x,y])
    return xy

def poly2mask(vertex_row_coords, vertex_col_coords, shape):
    '''Convert polygons to mask'''
    # https://github.com/scikit-image/scikit-image/issues/1103#issuecomment-52378754
    fill_row_coords, fill_col_coords = draw.polygon(
        vertex_row_coords, vertex_col_coords, shape)
    mask = np.zeros(shape, dtype=np.bool)
    mask[fill_row_coords, fill_col_coords] = True
    return mask


def coords_to_mask(coords, im_size):
    '''Generate binary mask from polygons coordinates'''
    binary_mask = np.zeros(im_size, dtype=np.bool)
    color_mask = np.zeros((im_size[0], im_size[1], 3), dtype=np.int32)
    
    for i in range(len(coords)):
        points = np.array(coords[i])
        small_x = points[:,0]
        small_y = points[:,1]

        # make a mask and add it to the current mask
        # this addition makes it obvious when more than 
        # 1 layer overlap each.
        # Other, can be changed to simply an OR 
        # depending on application.

        polygon = poly2mask(small_x, small_y, im_size)

        binary_mask |= polygon

    return binary_mask.T

def coords_to_contours(coords, im_size):
    '''Generate contours from polygons coordinates'''
    edges = np.zeros(im_size, dtype=np.uint8)
    
    for coord in coords:
        c = np.array(coord)
        # Poligon has to have at least 3 coordinats
        if len(c) > 2: 
            rr, cc = polygon_perimeter(c[:,1], c[:,0], shape=edges.shape,clip=True)
            edges[rr, cc] = 255

    return edges

def binary_poly_to_center_gradiant(polygon):
    '''Calculate distance from center to every point of the polygone'''
    center_coords = ndimage.center_of_mass(polygon)
    dist_map = np.zeros_like(polygon, dtype=np.float)

    # coordinates where mask is true
    mask_coords = np.concatenate((np.where(polygon!=0)[0][:,None], 
                                  np.where(polygon!=0)[1][:,None]), axis=1)
    
    for x,y in mask_coords:
        dist = distance.euclidean((x,y), center_coords)
        dist_map[x,y] = dist

    # normalization
    dist_map = (dist_map - dist_map.min()) / (dist_map.max() - dist_map.min())
    dist_map[np.where(polygon!=0)] = 1 - dist_map[np.where(polygon!=0)]
    return dist_map


def coords_to_centers(coords, im_size):
    '''Get centers mask for polygons'''
    centers = np.zeros(im_size, dtype=np.float)

    for i in range(len(coords)):
        points = np.array(coords[i])
        small_x = points[:,0]
        small_y = points[:,1]
        
        polygon = poly2mask(small_x, small_y, im_size)
        
        grad_poly = binary_poly_to_center_gradiant(polygon)
        cond = grad_poly > centers
        centers[cond] = grad_poly[cond]
        
    return centers.T
        

def copy_masks(ann_dir, im_size):
#     ann_dir = raw_train_anno_dir
    
    # Start of the function
    anns_fn = sorted([p for p in os.listdir(ann_dir) if not 'ipynb' in p])

    for i, filename in enumerate(anns_fn):
        print('[{:2d}/{:3d}] {}'.format(i+1, len(anns_fn), filename))
        # Get annotation file
        file_path = join(ann_dir, filename)
        doc = xml.dom.minidom.parse(file_path)

        # Get Regions info
        regions = doc.getElementsByTagName('Region')
        coords = get_coordinates(regions)

        # Compute binary mask  
#         binary_mask = coords_to_mask(coords, def_im_size)

        # Compute countuors mask
#         countuors_mask = coords_to_contours(coords, def_im_size)
        
        # Compute centers mask
        centers_mask = coords_to_centers(coords, def_im_size)
        centers_mask = (centers_mask*255).astype(np.uint8)

        # Save masks
#         dst_bi = join(all_masks_dir, filename[:-4]+'.png')
#         dst_co = join(all_countuors_dir, filename[:-4]+'.png')
        dst_ce = join(all_centers_dir, filename[:-4]+'.png')

#         Image.fromarray(binary_mask).resize(im_size).save(dst_bi, 'PNG')
#         Image.fromarray(countuors_mask).resize(im_size).save(dst_co, 'PNG')
        Image.fromarray(centers_mask).resize(im_size).save(dst_ce, 'PNG')


In [43]:
print('Train masks generation...')
copy_masks(raw_train_anno_dir, im_size)
print('Test masks generation...')
copy_masks(raw_test_anno_dir, im_size)

Train masks generation...
[ 1/ 30] TCGA-18-5592-01Z-00-DX1.xml
[ 2/ 30] TCGA-21-5784-01Z-00-DX1.xml
[ 3/ 30] TCGA-21-5786-01Z-00-DX1.xml
[ 4/ 30] TCGA-38-6178-01Z-00-DX1.xml
[ 5/ 30] TCGA-49-4488-01Z-00-DX1.xml
[ 6/ 30] TCGA-50-5931-01Z-00-DX1.xml


  for dir in range(input.ndim)]


[ 7/ 30] TCGA-A7-A13E-01Z-00-DX1.xml
[ 8/ 30] TCGA-A7-A13F-01Z-00-DX1.xml
[ 9/ 30] TCGA-AR-A1AK-01Z-00-DX1.xml
[10/ 30] TCGA-AR-A1AS-01Z-00-DX1.xml
[11/ 30] TCGA-AY-A8YK-01A-01-TS1.xml
[12/ 30] TCGA-B0-5698-01Z-00-DX1.xml
[13/ 30] TCGA-B0-5710-01Z-00-DX1.xml
[14/ 30] TCGA-B0-5711-01Z-00-DX1.xml
[15/ 30] TCGA-CH-5767-01Z-00-DX1.xml
[16/ 30] TCGA-DK-A2I6-01A-01-TS1.xml
[17/ 30] TCGA-E2-A14V-01Z-00-DX1.xml
[18/ 30] TCGA-E2-A1B5-01Z-00-DX1.xml
[19/ 30] TCGA-G2-A2EK-01A-02-TSB.xml
[20/ 30] TCGA-G9-6336-01Z-00-DX1.xml
[21/ 30] TCGA-G9-6348-01Z-00-DX1.xml
[22/ 30] TCGA-G9-6356-01Z-00-DX1.xml
[23/ 30] TCGA-G9-6362-01Z-00-DX1.xml
[24/ 30] TCGA-G9-6363-01Z-00-DX1.xml
[25/ 30] TCGA-HE-7128-01Z-00-DX1.xml
[26/ 30] TCGA-HE-7129-01Z-00-DX1.xml
[27/ 30] TCGA-HE-7130-01Z-00-DX1.xml
[28/ 30] TCGA-KB-A93J-01A-01-TS1.xml
[29/ 30] TCGA-NH-A8F7-01A-01-TS1.xml
[30/ 30] TCGA-RD-A8N9-01A-01-TS1.xml
Test masks generation...
[ 1/ 14] TCGA-2Z-A9J9-01A-01-TS1.xml
[ 2/ 14] TCGA-44-2665-01B-06-BS6.xml
[ 3/ 14] TCGA

# CPM17

In [66]:
'''
+ Copy Images to new folder
    + Train
    + Test
- Copy Masks to new folder
    - Make Masks train
    - Make Masks test
- Copy Countuors to new folder
    - Make Countuors train
    - Make Countuors test
'''

'\n+ Copy Images to new folder\n    + Train\n    + Test\n- Copy Masks to new folder\n    - Make Masks train\n    - Make Masks test\n- Copy Countuors to new folder\n    - Make Countuors train\n    - Make Countuors test\n'

In [67]:
im_size = (600,600)

In [68]:
# NEW DATAFOLDERS

data_dir = '../data/CPM17_data'

train_imgs_dir = join(data_dir, 'train_images')
test_imgs_dir = join(data_dir, 'test_images')
all_masks_dir = join(data_dir, 'all_masks')
all_countuors_dir = join(data_dir, 'all_contours')

mkdir(data_dir)
mkdir(train_imgs_dir)
mkdir(test_imgs_dir)
mkdir(all_masks_dir)
mkdir(all_countuors_dir)


In [69]:
# RAW DATA FOLDERS
raw_data_dir = '../data/CPM-17/cpm17'

raw_train_dir = join(raw_data_dir, 'train')
raw_train_imgs_dir = join(raw_train_dir, 'Images')
raw_train_anno_dir = join(raw_train_dir, 'Labels')

raw_test_dir = join(raw_data_dir, 'test')
raw_test_imgs_dir = join(raw_test_dir, 'Images')
raw_test_anno_dir = join(raw_test_dir, 'Labels')


In [86]:
def copy_images(src_img_dir, dst_img_dir, prefix=None, im_size=None):
    '''Copy Images to new folder'''
    imgs_fn = sorted([p for p in os.listdir(src_img_dir) if not 'ipynb' in p])

    for filename in imgs_fn:
        src = join(src_img_dir, filename)
        dst = join(dst_img_dir, filename[:-4]+'.png')
        if prefix:
            dst = join(dst_img_dir, filename[:-4]+prefix+'.png')
        
        img = Image.open(src).convert('RGB')

        if im_size:
            img = img.resize(im_size)
        img.save(dst, 'PNG')

In [71]:
# Copy images
# Train
copy_images(raw_train_imgs_dir, train_imgs_dir, prefix='_train', im_size=im_size)

# Test
copy_images(raw_test_imgs_dir, test_imgs_dir, prefix='_test', im_size=im_size)


In [85]:
def copy_masks(labels_path, prefix=None, im_size=None):
    '''Function for masks copy'''
    labels_filenames = sorted([p for p in os.listdir(labels_path) if not 'ipynb' in p])

    for filename in labels_filenames:
        file_path = join(labels_path, filename)

        mat = scipy.io.loadmat(file_path)

        # Make binary mask
        binary_mask = mat['inst_map']
        binary_mask = (binary_mask > 0).astype(np.uint8) * 255

        # Save mask
        dst_bi = join(all_masks_dir, file_path.split('/')[-1][:-4]+'.png')
        if prefix:
            dst_bi = join(all_masks_dir, file_path.split('/')[-1][:-4]+prefix+'.png')
           
        img = Image.fromarray(binary_mask)
        if im_size:
            img = img.resize(im_size)
        img.save(dst_bi, 'PNG')


In [73]:
# Copy train masks
copy_masks(raw_train_anno_dir, prefix='_train', im_size=im_size)

# Copy test masks
copy_masks(raw_test_anno_dir, prefix='_test', im_size=im_size)


In [84]:
def copy_countur(labels_path, prefix=None, im_size=None):
    labels_filenames = sorted([p for p in os.listdir(labels_path) if not 'ipynb' in p])

    for filename in labels_filenames:
        # Load instance mask
        file_path = join(labels_path, filename)
        mat = scipy.io.loadmat(file_path)
        mask = mat['inst_map'].astype(int)
        mask = np.repeat(mask[:,:,np.newaxis], 3, axis=2) 

        # Create output counturs array as mask
        counturs = np.zeros_like(mask[:,:,0], dtype='uint8')

        # Number of instances
        n_instance = mask[:,:,0].max()

        # Generate countur for each instance
        for n in range(n_instance):
            img1 = mask[:,:,0] == n + 1
            img1 = img1.astype('uint8') * 255

            idx = cv2.findContours(img1,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)[1][0]

            counturs[idx[:,0,0],idx[:,0,1]] = 255

        # Save counturs
        dst_co = join(all_countuors_dir, filename[:-4]+'.png')
        if prefix:
            dst_co = join(all_countuors_dir, filename[:-4]+prefix+'.png')
        
        img = Image.fromarray(counturs)
        if im_size:
            img = img.resize(im_size)
        img.rotate(-90).transpose(Image.FLIP_LEFT_RIGHT).save(dst_co, 'PNG')


In [75]:
# Copy train countur 
copy_countur(raw_train_anno_dir, prefix='_train', im_size=im_size)

# Copy test countur 
copy_countur(raw_test_anno_dir, prefix='_test', im_size=im_size)

# CoNSeP

In [87]:
# NEW DATAFOLDERS

data_dir = '../data/CoNSeP_data'

train_imgs_dir = join(data_dir, 'train_images')
test_imgs_dir = join(data_dir, 'test_images')
all_masks_dir = join(data_dir, 'all_masks')
all_countuors_dir = join(data_dir, 'all_contours')

mkdir(data_dir)
mkdir(train_imgs_dir)
mkdir(test_imgs_dir)
mkdir(all_masks_dir)
mkdir(all_countuors_dir)


In [88]:
# RAW DATA FOLDERS
raw_data_dir = '../data/CoNSeP'

raw_train_dir = join(raw_data_dir, 'Train')
raw_train_imgs_dir = join(raw_train_dir, 'Images')
raw_train_anno_dir = join(raw_train_dir, 'Labels')

raw_test_dir = join(raw_data_dir, 'Test')
raw_test_imgs_dir = join(raw_test_dir, 'Images')
raw_test_anno_dir = join(raw_test_dir, 'Labels')


In [89]:
# Copy images
# Train
copy_images(raw_train_imgs_dir, train_imgs_dir)

# Test
copy_images(raw_test_imgs_dir, test_imgs_dir)


In [90]:
# Copy train masks
copy_masks(raw_train_anno_dir)

# Copy test masks
copy_masks(raw_test_anno_dir)


In [91]:
# Copy train countur 
copy_countur(raw_train_anno_dir)

# Copy test countur 
copy_countur(raw_test_anno_dir)

# Generate new centroids data

In [126]:
def_im_size = (1000, 1000) # original image size
im_size = (500, 500)
global prefix, postfix
prefix = 2007
postfix = 0
max_classes = 300

'0042'