# Segmentation

In [3]:
import numpy as np

import numba
import pandas

import skimage
import skimage.io
import skimage.filters
import skimage.segmentation
import skimage.measure
import skimage.morphology
import skimage.feature
import bi1x

import imageio

import os
import colorcet

import osgeo
import osgeo.ogr

import bokeh.io
import bokeh.plotting
import bokeh.palettes as bp
import bokeh.models

bokeh.io.output_notebook()

notebook_url = 'localhost:8888'

import max_int_projection

### Notes for myself

Will need to sift through dictionary to ensure that only images in correct channel are segmented

Thresholding for fluorescence done on each image from only values in that image

Using area and shape to segment cells will be based on entire dataset

I'm using a lot of information and code from one of our Bi1x lessons, image processing 2

I need to go through all images, compute the minimum area of cells, max area of cells, and eccentricity. Then, run stats on them to determine a cutoff for both. Finally, go through all of the images again using those values and applying them to segment

In [11]:
directory = 'Data/TX_Spen_KO_Parent/TX_Spen_KO_WT_Xist_T4_Kdm_T1_Atrx_T6-26.czi'
im = czifile.imread(directory)
im.shape

NameError: name 'czifile' is not defined

# Uses the max int projection functions to produce a dictionary of image arrays
im_dct = max_int_projection.loop_through_files(input_path='Data/TX_Spen_KO_Parent/TX_Spen_KO_WT_Xist_T4_Kdm_T1_Atrx_T6-02.czi', 
                            output_path='Data/out', path_type='image')

## Actual Functions For Everything

### Median Filtering

In [12]:
def med_filter(im):
    '''Takes in an input of a numpy array of an image and returns a median
    filtered numpy array.'''
    return skimage.filters.median(im)

In [14]:
image = skimage.io.imread('Data/out/max_c3_TX_Spen_KO_WT_Xist_T4_Kdm_T1_Atrx_T6-02.tiff')
seg_im = med_filter(image)

'''bokeh.io.show(bi1x.viz.imshow(seg_im))'''

'bokeh.io.show(bi1x.viz.imshow(seg_im))'

### Otsu Thresholding

In [15]:
def threshold(im):
    '''Takes in an input of an image numpy array and returns a thresholded numpy array'''
    im_med_filt = med_filter(im)
    otsu_thresh = skimage.filters.threshold_otsu(im_med_filt)
    return im > otsu_thresh

In [16]:
seg_im = threshold(image)

'''bokeh.io.show(bi1x.viz.imshow(seg_im))'''

'bokeh.io.show(bi1x.viz.imshow(seg_im))'

### Clearing Partial Cells

In [17]:
def clear_partial_cells(im, buffer_size=10):
    '''Takes an input of a numpy array of an image and a buffer size
    for how many pixels around the edges. Default is buffer_size=10.
    Returns a numpy array with partial cells on the perimeter of the 
    image removed.'''
    im_thresh = threshold(im)
    return skimage.segmentation.clear_border(im_thresh, buffer_size=buffer_size)

## Watershed Segmentation

In [18]:
def watershed_segmentation(im, 
                           indices=False, 
                           footprint=np.ones((3,3)), 
                          buffer_size=10):
    '''Takes an input of a numpy array of an image,
    indices to put into the peak local maxes, default is indices=False,
    a footprint, default is footprint=np.ones((3,3)), 
    and a buffer size for the clear edges function, default is 10.
    
    Returns a numpy array of a segmented image.
    '''
    image = clear_partial_cells(im, buffer_size=buffer_size)
    distance = ndimage.distance_transform_edt(image)
    local_maxi = skimage.feature.peak_local_max(distance, indices=indices, footprint=footprint, labels=image)
    markers = skimage.morphology.label(local_maxi)
    return skimage.morphology.watershed(-distance, markers, mask=image)

In [20]:
image = skimage.io.imread('Data/out/max_c3_TX_Spen_KO_WT_Xist_T4_Kdm_T1_Atrx_T6-02.tiff')
seg_im = watershed_segmentation(image)

'''bokeh.io.show(bi1x.viz.imshow(seg_im))'''

NameError: name 'ndimage' is not defined

## Random Walker Segmentation

In [21]:
def random_walker_segmentation(im, indices=False, 
                               footprint=np.ones((3,3)),
                               buffer_size=10):
    '''Takes an input of a numpy array of an image,
    the indices, default is False,
    the footprint, default is np.ones((3,3)), and
    a buffer size for the clear edges function. 
    
    Returns a segmented numpy array.'''
    image = clear_partial_cells(im, buffer_size=buffer_size)
    im_to_invert = image.astype(np.int)
    distance = ndimage.distance_transform_edt(image)
    local_maxi = skimage.feature.peak_local_max(distance, indices=indices, footprint=footprint, labels=image)
    markers = skimage.morphology.label(local_maxi)
    markers[~im_to_invert] = -1
    return skimage.segmentation.random_walker(im_to_invert, markers)

In [22]:
seg_im = random_walker_segmentation(image)

'''bokeh.io.show(bi1x.viz.imshow(seg_im))'''

NameError: name 'ndimage' is not defined

## Image Props Method of Segmentation

In [23]:
def image_props_segmentation(im, min_area, min_eccentricity, buffer_size=10):
    '''Takes in a numpy array of an image,
    the minimum area cells need to be,
    the minimum eccentricity of cells, and 
    a buffer size for the clear partial cells function, default is 10.
    
    Returns a segmented array of the image.
    
    This method uses code from Justin Bois. Taught during Bi1X.'''
    image = clear_partial_cells(im, buffer_size=buffer_size)
    
    # Produce binary image with cells=1 and background=0 and gives each cell a unique identifier
    im_labeled, n_labels = skimage.measure.label(image, background=0, return_num=True)
    
    # Extract the cells from background
    im_props = skimage.measure.regionprops(im_labeled)
    
    # Loop through image properties and delete small objects and objects that aren't circular enough
    n_regions = 0
    for prop in im_props:
        area = prop.area
        if prop.area < min_area:
            image[im_labeled == prop.label] = False
        else:
            n_regions += 1
    return image, n_regions
'''    elif prop.eccentricity > max_eccentricity:
        im_cleared_edges[im_labeled == prop.label] = False'''

'    elif prop.eccentricity > max_eccentricity:\n        im_cleared_edges[im_labeled == prop.label] = False'

In [6]:
seg_im = image_props_segmentation(image, 2000, .5)

'''bokeh.io.show(bi1x.viz.imshow(seg_im))'''

NameError: name 'image' is not defined

## Chan Vese Segmentation

In [24]:
def chan_vese_segmentation(im, mu=.75, buffer_size=10):
    '''Takes in an input of a numpy array of an image,
    a mu value for segmentation, default is .75, and
    a buffer size, default is 10.'''''
    image = clear_partial_cells(im, buffer_size=buffer_size).astype(int)
    return skimage.segmentation.chan_vese(image, mu=mu)

In [296]:
seg_im = chan_vese_segmentation(image)

'''bokeh.io.show(bi1x.viz.imshow(seg_im))'''

'bokeh.io.show(bi1x.viz.imshow(seg_im))'

## Feltzenszwalb Segmentation

In [25]:
def feltzenszwalb_segmentation(im, scale=2.0, multichannel=False, buffer_size=10):
    '''Takes an input of a numpy array of an image,
    a scale to perform segmentation in, default is 2.0,
    whether the image is multichannel, default is False,
    and the buffer size, default is 10
    
    Returns a numpy array of a segmented image'''
    image = clear_partial_cells(im, buffer_size=buffer_size)
    return skimage.segmentation.felzenszwalb(image, scale=scale, multichannel=multichannel)

In [298]:
seg_im = feltzenszwalb_segmentation(image)

'''bokeh.io.show(bi1x.viz.imshow(seg_im))'''

'bokeh.io.show(bi1x.viz.imshow(seg_im))'

## Find Boundaries Method

In [26]:
def find_boundaries(im, connectivity=10, mode='inner', buffer_size=10):
    '''Takes an input of a numpy array of an image, 
    the connectivity of the boundaries, default is 10,
    the mode, default is "inner", and 
    the buffer size, default is 10.
    
    Returns a numpy array of boundaries of cells.'''
    image = clear_partial_cells(im, buffer_size=buffer_size)
    return skimage.segmentation.find_boundaries(image, connectivity=10, mode='inner')

In [27]:
seg_im = find_boundaries(image)

'''bokeh.io.show(bi1x.viz.imshow(seg_im))'''

'bokeh.io.show(bi1x.viz.imshow(seg_im))'

### Mark Boundaries

In [28]:
def mark_boundaries(im, connectivity=10, mode='inner', buffer_size=10):
    '''Takes an input of a numpy array of an image, 
    the connectivity of the boundaries, default is 10,
    the mode, default is "inner", and 
    the buffer size, default is 10.
    
    Returns a numpy array of boundaries of cells bound to the 
    original image.'''
    image = clear_partial_cells(im, buffer_size=buffer_size)
    boundaries = find_boundaries(im, connectivity=10, mode='inner', buffer_size=10) 
    return skimage.segmentation.mark_boundaries(image, boundaries)

In [29]:
seg_im = mark_boundaries(image)

'''bokeh.io.show(bi1x.viz.imshow(seg_im))'''

'bokeh.io.show(bi1x.viz.imshow(seg_im))'

## Segmentation Call

In [30]:
def im_arrays(directory):
    '''Iterates through an input directory (if folder) and produces a dictionary 
    of image names as keys and numpy arrays of the images as values.'''
    im_dct = {}
    for im in os.listdir(directory):
        im_name = im.split('.')
        if len(im_name) > 1:
            if im_name[1] == 'tiff' or im_name[1] == 'tif':
                value = skimage.io.imread(directory + '/' + im)
                im_dct[im] = value
    return im_dct

In [31]:
def save_images(im_path, im_array):
    '''Saves an image for a given path and image array as a tiff. 
    
    Note that this creates a user warning about a low contrast image, 
    but this is fine since this is simply a binary image'''
    
    int_array = 1 * im_array
    return skimage.io.imsave(im_path, int_array)

In [32]:
def save_as_csv(data, input_path, segmentation, output_path):
    '''Takes an input of of cell data as a tuple of two dictionaries
    and saves it in a csv file for a specified output path.
    
    Returns a pandas dataframe of the input data.'''
    
    cell_type_lst = []
    
    # Separating the tuple of dictionaries and then merging them and converting to dataframe
    auto_cells_dct = data[0]
    auto_cells_dct.update({'automated or manual segmentation': 'automated'})
    manual_cells_dct = data[1]
    manual_cells_dct.update({'automated or manual segmentation': 'manual'})

    auto_df = pandas.DataFrame(auto_cells_dct, index = [0])
    manual_df = pandas.DataFrame(manual_cells_dct, index = [0])
    df = pandas.concat([auto_df, manual_df], join='inner', ignore_index=True)
    
    
    '''# Producing indexing for auto vs manually segmented cells
    for i in range(len(auto_cells_dct)):
        cell_type_lst.append('# auto segmented cells')
    for i in range(len(manual_cells_dct)):
        cell_type_lst.append('# cells to manually segment')
    auto_vs_man_dct = {'# auto or man cells': cell_type_lst}
    auto_vs_man_df = pandas.DataFrame(auto_vs_man_dct, index = [0])
    df.merge(auto_vs_man_df)'''
    
    # Producing directory and name for the csv file for specified output path
    input_lst = input_path.split('/')
    csv_file_name = input_lst[len(input_lst)-1] + '.csv'
    directory = output_path + '/' + segmentation + '_' + csv_file_name
    
    # Saving as a csv file
    df.to_csv(directory)
    return df

In [33]:
def make_empty_array(im):
    '''Takes in the shape of an image and returns an empty rgb numpy array 
    based on the shape of the original image.'''
    shape = im.shape
    shape_lst = []
    if shape[2] != 3:
        shape_lst.append(shape[0]).append(shape[1]).append(3)
        shape = tuple(shape_lst)
    return np.empty(shape, dtype=np.int8)

# Important

Need to save data about each object. Center and major and minor axis would be good.

In [1]:
def check_area_and_count_cells(im_segmented, im_name, min_area, max_area):
    '''Takes in a segmented image, ensures each segmented cell
    meets the minimum and maximum areas of a cell. Asks for user input for cells measured to be too large to be a cell.
    Counts the number of labeled regions within the image that were automatically counted.
    Changes RGB values to distinguish cells that need to be manually segmented from those that were automatically segmented.
    
    Manually segment=blue and automatically segment is yellow.'''
    n_cells = 0
    manual_cells = 0
    labeled_im = skimage.measure.label(im_segmented)
    
    # Initialize an empty image array to store automatically segmented cells.
    shape = im_segmented.shape
    auto_seg_cells_array = np.empty(shape, dtype=np.int8)
    for x in range(shape[0]):
        for y in range(shape[1]):
            auto_seg_cells_array[x][y][0] = 0
            auto_seg_cells_array[x][y][1] = 0
            auto_seg_cells_array[x][y][2] = 0
    
    for region in skimage.measure.regionprops(labeled_im):
        i = 0
        for coord in region.coords:
            (x,y,z) = coord
            if region.area < min_area:
                labeled_im[x][y][z] = 0
            elif region.area > max_area:
                labeled_im[x][y][0] = 1
                labeled_im[x][y][1] = 0
                labeled_im[x][y][2] = 0
                if i < 1:
                    manual_cells += 1
            else:
                labeled_im[x][y][0] = 0
                labeled_im[x][y][1] = 0
                labeled_im[x][y][2] = 1
                auto_seg_cells_array[x][y][0] = 1
                if i < 1:
                    n_cells += 1
            i += 1
                
    return (labeled_im, n_cells, manual_cells, auto_seg_cells)
    

In [420]:
im = skimage.io.imread('Data/seg_test_ims/altering_man_cells/boundaries_max_c3_TX_Spen_KO_WT_Xist_T4_Kdm_T1_Atrx_T6-26.tiff')
im_name = 'boundaries_max_c3_TX_Spen_KO_WT_Xist_T4_Kdm_T1_Atrx_T6-09.tiff'
min_area = 2000
max_area = 10000

im_checked = check_area_and_count_cells3(im, im_name, min_area, max_area)[0]

In [421]:
bokeh.io.show(bi1x.viz.imshow(im.astype(np.int8)))

In [422]:
bokeh.io.show(bi1x.viz.imshow(im_checked.astype(np.int8)))

  im_rgb[:,:,i] /= im_rgb[:,:,i].max()
  if (im < 0).any() or (im > 1).any():
  if (im < 0).any() or (im > 1).any():


In [47]:
def segmentation_arrays(input_path, 
                        output_path,
                        channel, 
                        segmentation,
                        save_ims=True,
                        save_csv=True,
                        connectivity=10,
                        mode='inner',
                        buffer_size=0,
                        min_area=2000,
                        max_area = 10000,
                        min_eccentricity=0.5,
                        indices=False,
                        footprint=np.ones((3,3)),
                        mu=.75,
                        scale=2.0,
                        multichannel=False
                       ):
    '''Input channel as a string in the form c# that you would like to segment in
    
    Segmentations: 
    "image_props" = image prop segementation
    "watershed" = watershed segmentation
    "random_walker" = random walker segmentation
    "chan_vese" = Chan Vese Segmentation
    "feltzenszwalb" = Feltzenszwalb Segmentation
    "boundaries" = find and mark boundaries segmentation
    
    
    Returns a dictionary of image arrays in that channel that have been segmented.'''
    # Initialize dictionary for segmentation arrays
    segmented_dct = {}
    n_cells_dct = {}
    manual_cells_dct = {}
    auto_cells_dct = {}
    
    # Produce a dictionary from input of max intensity files
    # This will need some additional tests
    im_dct = im_arrays(input_path)
    for key in im_dct:
        
        # Ensure that only segmenting in the DAPI channel or specified channel
        split_key = key.split('/')
        im_name = split_key[len(split_key)-1]
        if len(im_name) > 1:
            key_channel = im_name.split('_')[1]


            if key_channel == channel:
                im = im_dct[key]
                seg_im_name = output_path + '/' + segmentation + '_' + key

                # Median filtering, thresholding, and clearing boundaries are all built into segmentation
                # If segmentation is find boundaries
                if segmentation == 'boundaries':
                    im_segmented = mark_boundaries(im, 
                                                   mode=mode,
                                                   connectivity=connectivity,
                                                   buffer_size=buffer_size,
                                                  )
                    checked_tuple = check_area_and_count_cells(im_segmented, 
                                                               key,
                                                               min_area=min_area,
                                                               max_area=max_area)
                    # Calls function to save segmented cells as images
                    if save_ims:
                        save_images(seg_im_name, checked_tuple[0])
                        
                # Need to integrate to check area with other methods
                elif segmentation == 'image_props':
                    im_props_tuple = image_props_segmentation(im, 
                                                            min_area, 
                                                            min_eccentricity, 
                                                            buffer_size=buffer_size)
                    checked_tuple = im_props_tuple
                    if save_ims:
                        save_images(seg_im_name, checked_tuple[0])
                
                # Might not be working properly
                elif segmentation == 'watershed':
                    im_segmented = watershed_segmentation(im, 
                                                          indices=indices, 
                                                          footprint=footprint, 
                                                          buffer_size=buffer_size)
                    checked_tuple = check_area_and_count_cells(im_segmented, 
                                                               key,
                                                               min_area=min_area,
                                                               max_area=max_area)
                    if save_ims:
                        save_images(seg_im_name, checked_tuple[0])
                elif segmentation == 'random_walker':
                    im_segmented = random_walker_segmentation(im, indices=indices, 
                                                              footprint=footprint,
                                                              buffer_size=buffer_size)
                    checked_tuple = check_area_and_count_cells(im_segmented, 
                                                               key,
                                                               min_area=min_area,
                                                               max_area=max_area)
                    if save_ims:
                        save_images(seg_im_name, checked_tuple[0])
                elif segmentation == 'chan_vese':
                    im_segmented = chan_vese_segmentation(im, 
                                                          mu=mu, 
                                                          buffer_size=buffer_size)
                    checked_tuple = check_area_and_count_cells(im_segmented, 
                                                               key,
                                                               min_area=min_area,
                                                               max_area=max_area)
                    if save_ims:
                        save_images(seg_im_name, checked_tuple[0])
                elif segmentation == 'feltzenszwalb':
                    im_segmented = feltzenszwalb_segmentation(im, 
                                                              scale=scale, 
                                                              multichannel=multichannel, 
                                                              buffer_size=buffer_size)
                    checked_tuple = check_area_and_count_cells(im_segmented, 
                                                               key,
                                                               min_area=min_area,
                                                               max_area=max_area)
                    if save_ims:
                        save_images(seg_im_name, checked_tuple[0])
                else:
                    print('Please input a valid segmentation type listed in the docstring.')
                segmented_dct[key] = checked_tuple[0]
                n_cells_dct[key] = checked_tuple[1]
                manual_cells_dct[key] = checked_tuple[2]
                auto_cells_dct[key] = checked_tuple[3]
    if save_csv:
        df = save_as_csv((n_cells_dct, manual_cells_dct), 
                        input_path=input_path,
                        segmentation=segmentation,
                        output_path=output_path)
    return (segmented_dct, n_cells_dct, auto_cells_dct)

In [48]:
segmented_cells = segmentation_arrays(input_path='Data/test_ims',
                                    output_path='Data/test_ims/outputs/seg_outputs/seg_rerun2',
                                    channel='c3',
                                    segmentation='boundaries',
                                    save_ims=True,
                                     save_csv=True)

segmented_ims = segmented_cells[0]
n_cells = segmented_cells[1]

0 1
2 1


  
  


0 1
0 1


  
  
  


0 1
0 1


  
  


0 1
0 1


  
  


0 1
12 0


  


4 3


  
