# The nucleus challenge

## Read the data

In [None]:
import pathlib
import imageio
import numpy as np

In [None]:
training_paths = pathlib.Path('data/data-science-bowl-2018/stage1_train').glob('*/images/*.png')
training_sorted = sorted([x for x in training_paths])
print(*training_sorted[0:2], sep='\n')

## Plot the data

In [None]:
import matplotlib.pyplot as plt


def show_images(ims, cmap=None, labels=None):
    plt.figure(figsize=(3 * len(ims),10))
    if labels is not None:
        assert(len(labels) == len(ims)), 'provide exactly one label per image'
    for idx, im in enumerate(ims):
        plt.subplot(1,len(ims),idx + 1)
        plt.imshow(im, cmap=cmap)
        # plt.axis('off')
        if labels is None:
            plt.title('Image ' + str(idx))
        else:
            plt.title(labels[idx])
            
    plt.tight_layout()
    plt.show()

In [None]:
ims = list(map(lambda p: imageio.imread(str(p)), training_sorted[0:4]))
show_images(ims)

## Clean the data

### Coerce to gray scale

In [None]:
from skimage.color import rgb2gray

im = imageio.imread(str(training_sorted[0]))  # image instance for testing
im2 = imageio.imread(str(training_sorted[2]))  # image instance for testing

# Print the image dimensions
print('Original image shape: {}'.format(im.shape))

# Coerce the image into grayscale format (if not already)
print('New image shape: {}'.format(rgb2gray(im).shape))

ims = list(map(lambda p: rgb2gray(imageio.imread(str(p))), training_sorted[0:4]))
show_images(ims, cmap='gray')

### Remove background

#### Analyze distribution of pixel intensity

In [None]:
def show_pixel_values(ims):
    plt.figure(figsize=(2 * len(ims),2))
    for idx, im in enumerate(ims):
        plt.subplot(1,len(ims),idx + 1)
        im_pixels = im.flatten()
        plt.hist(im_pixels, bins=50)
        # plt.vlines(thresh_val, 0, 100000, linestyle='--')
        plt.xlim([0,1])
        plt.yscale('log', nonposy='clip')
        plt.xlabel('grayscale')
        plt.title('Image ' + str(idx))
            
    plt.tight_layout()
    plt.show()
    
ims = list(map(lambda p: rgb2gray(imageio.imread(str(p))), training_sorted[0:4]))
show_pixel_values(ims)

#### Mask all values above/below certain threshold

In [None]:
from skimage.filters import threshold_otsu


def get_image_mask(im):
    if len(im.shape) > 2:
        raise ValueError('need 2-dimensional image data, you provided ' + str(im.shape))
    strategy = 'mean'
    if strategy == 'mean':
        thresh_val = np.mean(im)
    elif strategy == 'otsu':
        thresh_val = threshold_otsu(im)
    else:
        thresh_val = 0.3
    mask = np.where(im > thresh_val, 1, 0)

    # Make sure the larger portion of the mask is considered background
    if np.sum(mask==0) < np.sum(mask==1):
        mask = np.where(mask, 0, 1)
        
    return mask


def mask_image(im):    
    return np.where(get_image_mask(im), im, 0)

In [None]:
ims = list(map(lambda p: mask_image(rgb2gray(imageio.imread(str(p)))), training_sorted[0:4]))
show_images(ims, cmap='gray')

In [None]:
print('Unmasked:')
ims = list(map(lambda p: rgb2gray(imageio.imread(str(p))), training_sorted[0:4]))
show_pixel_values(ims)
print('Masked:')
ims = list(map(lambda p: mask_image(rgb2gray(imageio.imread(str(p)))), training_sorted[0:4]))
show_pixel_values(ims)

### Create separate mask for each blob

In [None]:
from scipy import ndimage

mask = get_image_mask(rgb2gray(im))
labels, nlabels = ndimage.label(mask)

show_images([mask])
# show_images(labels[:3])
print(labels[200])

In [None]:
def get_separate_masks(im):
    mask = get_image_mask(rgb2gray(im))
    labels, nlabels = ndimage.label(mask)
    label_arrays = []
    for label_num in range(1, nlabels+1):
        label_mask = np.where(labels == label_num, 1, 0)
        label_arrays.append(label_mask)
    return label_arrays

masks = get_separate_masks(rgb2gray(im))
show_images([masks[18], masks[18][240:, 20:50]], labels=['Full', 'Zoom'])

### Create string representation of mask

In [None]:
def create_output_string(mask):
    nY, nX = mask.shape
    result = ''
    idx = 0
    count = 0
    for x in range(nX):
        for y in range(nY):
            if mask[y, x] == 1 and count == 0:
                result += str(idx) + ' '
                count += 1
            elif mask[y, x] == 1:
                count += 1
            elif (mask[y, x] == 0) and count != 0:
                result += 'c' + str(count) + ' '
                count = 0
                
            idx += 1
                
    return result
    
print(create_output_string(masks[18]))