This program takes as an input a 3 color confocal image (tiff format), and a binary mask generated by imageJ (tiff format). This binary mask represents the notochord region.

In [None]:
import os
import numpy as np
import scipy.ndimage as ndi
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.feature import peak_local_max
from os.path import join
from skimage.io import imread
from skimage.filters import rank
from pandas import Series, DataFrame
import pandas as pd
from skimage.filters.thresholding import threshold_otsu
import skimage.measure as skmeas
import re
from matplotlib.backends.backend_pdf import PdfPages
pd.set_option("display.max_rows", 500)

%matplotlib inline

In [None]:
# This function returns an array including the names in the selected directory

def get_file_names(dirpath):
    img_files = []
    mask_files = []
    for file in os.listdir(dirpath):
        if file.endswith("_Out.tif"):
            img_files.append(file)
        elif file.endswith("_Mask.tif"):
            mask_files.append(file)
            
    return (img_files, mask_files)
      

In [None]:
# Function 'normalize' divides an image in 10x10 fragments, and measures the lowest mean value in all of them. 
# This value is defined as the background, and substracted to the image
# Then, the image is rescaled from 0 to 1.

def normalize(img):
    size_x = int(img.shape[1]/10)
    size_y = int(img.shape[0]/10)
    background = 1e6
    for i in range(10):
        for j in range(10):
            new_value = np.mean(img[i*size_y:(i+1)*size_y, j*size_x:(j+1)*size_x])
            if new_value < background:
                background = new_value
    
    img_norm = (img.astype('float64') - background)/(np.max(img) - background)
    
    return img_norm

# This function generates an combined adaptive and fixed threshold segmentation
def segment(gfp_norm, mask, sigma = 3, i = 120):
    sigma = 3
    gfp_norm_smooth = ndi.filters.gaussian_filter(gfp_norm,sigma)

    i = 120
    SE = (np.mgrid[:i,:i][0] - np.floor(i/2))**2 + (np.mgrid[:i,:i][1] - np.floor(i/2))**2 <= np.floor(i/2)**2

    gfp_norm_int = (gfp_norm*1023).astype('uint16')
    gfp_norm_int_smooth = ndi.filters.gaussian_filter(gfp_norm_int,sigma)
    bg = rank.mean(gfp_norm_int_smooth, selem = SE)
    adaptive_segmentation = gfp_norm_int_smooth > bg

    threshold = np.ma.median(np.ma.array(gfp_norm_smooth, mask = mask))*1.5
    fix_segmentation = gfp_norm_smooth > threshold

    gfp_binary = adaptive_segmentation & fix_segmentation
    gfp_binary_holefilled = ndi.binary_erosion(ndi.binary_dilation(gfp_binary, iterations = 5), iterations = 9)

    return gfp_binary_holefilled

# This function uses a regular expression to extract all the information from the name of the file.
def extract_info(filename):
    
    extracted = re.findall(r'(fish)([0-9]+)\.([0-9]+)_(z([0-9]+)_)?(.+)', filename)
    
    fish = int(extracted[0][1])
    region = int(extracted[0][2])
    if len(extracted[0][4]) > 0:
        z = int(extracted[0][4])
    else:
        z = 1
    
    return (fish, region, z)

# This function generates a summary pdf with all the analyze information
def save_table_pdf(df, path):
    fig, ax =plt.subplots(figsize=(12,4))
    ax.axis('tight')
    ax.axis('off')
    the_table = ax.table(cellText=df.values,colLabels=df.columns,loc='center')

    pp = PdfPages(path)
    pp.savefig(fig, bbox_inches='tight')
    pp.close()

In [None]:
def quantify(dirpath, filename, maskname):
    
    fish, region, z = extract_info(filename)
    
    filepath = join(dirpath, filename)
    maskpath = join(dirpath, maskname)

    img = imread(filepath)
    mask = np.logical_not(imread(maskpath))
    
    gfp = img[:,:,0]
    mrna1 = img[:,:,1]
    mrna2 = img[:,:,2]
    
    gfp_noto = np.ma.array(gfp, mask = mask)
    mrna1_noto = np.ma.array(mrna1, mask = mask)
    mrna2_noto = np.ma.array(mrna2, mask = mask)
   
    gfp_norm = (gfp - np.min(gfp))/(np.max(gfp) - np.min(gfp)) #Try function normalize
    mrna1_norm = normalize(mrna1)
    mrna2_norm = normalize(mrna2)
    
    gfp_norm_noto = np.ma.array(gfp_norm, mask = mask)
    mrna1_norm_noto = np.ma.array(mrna1_norm, mask = mask)
    mrna2_norm_noto = np.ma.array(mrna2_norm, mask = mask)
    
    gfp_binary_holefilled = segment(gfp_norm, mask)
    
    gfp_binary_holefilled_noto = np.where(mask == 1, 0, gfp_binary_holefilled) # I had to do the mask this way because the usual way it didn't work well.

    object_labels = skmeas.label(gfp_binary_holefilled_noto)

    final_mask = np.zeros_like(gfp)
    new_number = 1
    for cell_ID in range(1,np.max(object_labels)+1):
        cell_mask = object_labels != cell_ID
        area = np.sum(np.logical_not(cell_mask))
        if area > 3500: 
            final_mask
            final_mask[object_labels == cell_ID] = new_number
            new_number += 1

    final_mask_binary = final_mask != 0    
    
    
    fishs = []
    regions = []
    zs = []
    mrna1_int_rels = []
    mrna2_int_rels = []
    gfp_int_rels = []
    areas = []
    cell_nums = []
    cell_or_bgs = []
    mrna1_mean_noto = np.mean(np.ma.array(mrna1_norm, mask = mask))
    mrna2_mean_noto = np.mean(np.ma.array(mrna2_norm, mask = mask))
    gfp_mean_noto = np.mean(np.ma.array(gfp_norm, mask = mask))
    
    for cell_ID in range(0,np.max(final_mask)+1):
        cell_mask = (final_mask != cell_ID)
        area = np.sum(np.logical_not(cell_mask)) 

        mrna1_int_rel = np.mean(np.ma.array(mrna1_norm_noto, mask = cell_mask))/mrna1_mean_noto
        mrna2_int_rel = np.mean(np.ma.array(mrna2_norm_noto, mask = cell_mask))/mrna2_mean_noto
        gfp_int_rel = np.mean(np.ma.array(gfp_norm_noto, mask = cell_mask))/gfp_mean_noto
        
        fishs.append(fish)
        regions.append(region)
        zs.append(z)
        mrna1_int_rels.append(mrna1_int_rel)
        mrna2_int_rels.append(mrna2_int_rel)
        gfp_int_rels.append(gfp_int_rel)
        areas.append(area)
        cell_nums.append(cell_ID)
        cell_or_bgs.append(cell_ID != 0)
        
        
    df_to_return = DataFrame({'fish':fishs, 'region':regions, 'z':zs, 
                              'mrna1 int' : mrna1_int_rels, 'mrna2 int' : mrna2_int_rels, 'gfp int' : gfp_int_rels,                              
                              'area': areas, 'cell_num': cell_nums, 'Is a cell': cell_or_bgs,})
    

    gfp_membranes = np.logical_xor(ndi.binary_erosion(final_mask_binary, iterations = 2), final_mask_binary)
    noto_borders = np.logical_xor(ndi.binary_erosion(mask, iterations = 2), mask)
 
    plt.rcParams["axes.grid"] = False #This avoids the generation of grids after running the sns code.
    fig, ax = plt.subplots(3,1, figsize = (15,15))
    fig.suptitle(('Fish ' + str(fish)+', Region '+ str(region) +', z '+ str(z)), fontsize=18, y = 0.91)
    ax[0].imshow(gfp_norm, interpolation = 'none', cmap = 'gray')

    image_rgb = np.zeros((gfp.shape[0], gfp.shape[1], 3))
    image_rgb[:,:,1] = gfp_norm
    image_rgb[:,:,0] = np.where(gfp_membranes, 1, image_rgb[:,:,0])
    image_rgb[:,:,1] = np.where(gfp_membranes, 1, image_rgb[:,:,1])
    image_rgb[:,:,2] = np.where(gfp_membranes, 1, image_rgb[:,:,2])
    image_rgb[:,:,0] = np.where(noto_borders, 1, image_rgb[:,:,0])
    image_rgb[:,:,1] = np.where(noto_borders, 1, image_rgb[:,:,1])
    image_rgb[:,:,2] = np.where(noto_borders, 1, image_rgb[:,:,2])
    ax[1].imshow(image_rgb, vmin = 0, vmax = 1)
    
    # https://stackoverflow.com/questions/33828780/matplotlib-display-array-values-with-imshow
    object_labels = skmeas.label(final_mask)
    some_props = skmeas.regionprops(object_labels)
    
    for cell_ID in range(np.max(final_mask)):
        label = cell_ID + 1
        text_x = some_props[cell_ID].centroid[1]
        text_y = some_props[cell_ID].centroid[0]
        ax[1].text(text_x, text_y, label, fontsize = 16, color='white', ha='center', va='center')    
   
    mrna1_to_display = np.where(mrna1_norm > .2, 0.2, mrna1_norm)*5
    mrna2_to_display = np.where(mrna2_norm > .2, 0.2, mrna2_norm)*5
    
    image_rgb = np.zeros((gfp.shape[0], gfp.shape[1], 3))
    image_rgb[:,:,0] = mrna2_to_display
    image_rgb[:,:,1] = mrna1_to_display
    image_rgb[:,:,2] = mrna2_to_display
    image_rgb[:,:,0] = np.where(gfp_membranes, 1, image_rgb[:,:,0]) 
    image_rgb[:,:,1] = np.where(gfp_membranes, 1, image_rgb[:,:,1]) 
    image_rgb[:,:,0] = np.where(noto_borders, 1, image_rgb[:,:,0]) 
    image_rgb[:,:,1] = np.where(noto_borders, 1, image_rgb[:,:,1]) 
    ax[2].imshow(image_rgb, vmin = 0, vmax = 1)
    
    pdf.savefig(bbox_inches='tight')
    plt.show()
    
    return df_to_return

In [None]:
# This is the main function. It takes a directory as an input and performs the analysis for the files in the directory that follow the specified pattern.
def analyze_directory(dirpath):
    img_files, mask_files = get_file_names(dirpath)

    final_table = DataFrame(columns = ['fish', 'region', 'z', 
                                   'mrna1 int', 'mrna2 int', 'gfp int',                              
                                   'area', 'cell_num', 'Is a cell'])

    pdfpath = join(dirpath, 'summary.pdf')
    pdf = PdfPages(pdfpath)

    for (filename, maskname) in zip(img_files[0:2], mask_files[0:2]):
        table = quantify(dirpath, filename, maskname)
        final_table = pd.concat([final_table, table])
    pdf.close()    

    tablepdfpath = join(dirpath, 'table.pdf')
    save_table_pdf(final_table, tablepdfpath)

    csvpath = join(dirpath, 'table.csv')
    final_table.to_csv(csvpath)
    
    return final_table

In [None]:
dirpath = ## Write here the filepath
img_files, mask_files = get_file_names(dirpath)

final_table = DataFrame(columns = ['fish', 'region', 'z', 
                                   'mrna1 int', 'mrna2 int', 'gfp int',                              
                                   'area', 'cell_num', 'Is a cell'])

pdfpath = join(dirpath, 'summary.pdf')
pdf = PdfPages(pdfpath)

i = 0
for (filename, maskname) in zip(img_files, mask_files):
    table = quantify(dirpath, filename, maskname)
    final_table = pd.concat([final_table, table])
    if i == 14:
        pdf.close()
        pdfpath = join(dirpath, 'summary2.pdf')
        pdf = PdfPages(pdfpath)
    i += 1
pdf.close()    

tablepdfpath = join(dirpath, 'table.pdf')
save_table_pdf(final_table, tablepdfpath)

picklepath = join(dirpath, 'table.pkl')
final_table.to_pickle(picklepath)
