# Import required modules

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from skimage import io
from skimage import data
import skimage
from skimage.filters.thresholding import threshold_li
from skimage.morphology import erosion, dilation, opening, closing, white_tophat, remove_small_objects
from skimage.morphology import disk
from scipy import ndimage as ndi
import sys,os, glob

#import skimage.segmentation as seg
#import skimage.filters as filters
#import skimage.draw as draw
#import skimage.color as color

#from skimage.filters.thresholding import _cross_entropy
#from skimage.morphology import black_tophat, skeletonize, convex_hull_image


  from .collection import imread_collection_wrapper


# Define parameters for image loading here:

In [2]:
# Common parameters for loading the image files of interest

# for 4 channels metamorph images: channel index order is 1-2-3-4, which is the order as ch405-ch488-ch560-ch647

num_of_ch = 4

# Replace protein/dna label name in the "xxx" below for the indicated channel index (make sure the order is correct)
# For example: ch_dict = {"dna":1,"rpa/rpa2":2,"mdc1":3,"pol2S5":4} if rpa/rpa2 was used for ch488
# Make sure the label name is the same as the factor/dna key used in the line below
ch_dict = {"dna":1,"53BP1NB":2,"mdc1":3,"pol2S5":4}

# If use mdc1 as the factor for region segementation
# If use dna/dapi as the key channel for nucleus segmentation
factor_key =  "mdc1"
dna_key = "dna"

pol2_key = "pol2S5"

# Replace the data directory in the ""; * is the final path level where images are located
# For example: data_save_folder = r"F:\XXX\AAA\BBB\*"
data_save_folder = r"E:\Data_Stam\210228IF_210225exp_Mdc1_Fixed\cov2_53BP1NB\*"


#  Load all image files

In [4]:

# read data files from the hard drive
data_folder = data_save_folder
data_files = [file for file in glob.glob(data_folder) if file[-3:]=="TIF" and "thumb" not in file]

# group images for the same fov
sorted_data_files = []
num_of_ch = num_of_ch
num_of_img = len(data_files)
if num_of_img%num_of_ch == 0: #check if all images contain the same number of channels
    for i in range (int(num_of_img/num_of_ch)):
        data_for_each_cell = data_files[i*num_of_ch:i*num_of_ch+4]
        sorted_data_files.append(data_for_each_cell)
        
num_of_fov =len(sorted_data_files)
print (f"There are {num_of_fov} fovs for this {num_of_ch}-channel image dataset.")

There are 25 fovs for this 4-channel image dataset.


# Load fov of interest to check images and peform nucleus segmentation

In [17]:

# The pixel size for excluding small 53BP_foci;
# Replace *300* with other number desired or *0* if do not want to perform 53BP1 foci exclusion
#small_53BP_size = 300

small_53BP_size = 0

In [18]:

# Generate an empty list to store all measurements
measurement_for_all_fov = []

#To analyze a subset of fovs, use the code below where *2* means fov_0-fov_1; to analyze all fovs, replace *2* with num_of_fov;
# the number needs to be <= num_of_fov

for _fov_id in range (num_of_fov):
        
    # load images for 4 channels for each fov
    img_1 = io.imread(sorted_data_files[_fov_id][0])  #ch405 
    img_2 = io.imread(sorted_data_files[_fov_id][1])  #ch488
    img_3 = io.imread(sorted_data_files[_fov_id][2])  #ch560
    img_4 = io.imread(sorted_data_files[_fov_id][3])  #ch642
    # Store images into an img_dict based on their channel index
    ch_img_dict={'1':img_1,'2':img_2,'3':img_3,'4':img_4}
    # Find the best focal plane using the mdc1 channel
    image_fl = []
    image_std = []
    for i in range(len(ch_img_dict[str(ch_dict[factor_key])])):
        _fl = np.array(ch_img_dict[str(ch_dict[factor_key])][i,:,:].flatten())
        image_fl.append(_fl)
        image_std.append(np.std(_fl))
    best_plane_index = np.argmax(np.array(image_std))
    print(f"Analyzing the plane {best_plane_index} for the image {_fov_id} in this dataset")   
    
    # load the best focal plane
    img_1_bf=img_1[best_plane_index,:,:]
    img_2_bf=img_2[best_plane_index,:,:]
    img_3_bf=img_3[best_plane_index,:,:]
    img_4_bf=img_4[best_plane_index,:,:]
    # Update the img_dict
    ch_img_dict={'1':img_1_bf,'2':img_2_bf,'3':img_3_bf,'4':img_4_bf}
    
    
    # Use Li_global_th and binary operations on the dna channel to generate nuclei masks
    li_value =  threshold_li (ch_img_dict[str(ch_dict[dna_key])])
    nuclei_mask = ch_img_dict[str(ch_dict[dna_key])]>li_value
    erosion_factor_dna = 5
    eroded_nuclei_mask = erosion(nuclei_mask, disk(erosion_factor_dna))
    eroded_nuclei_mask = ndi.binary_fill_holes(eroded_nuclei_mask)
    eroded_nuclei_mask = remove_small_objects(eroded_nuclei_mask, 10000,connectivity=1)

    # erosion and dilation to remove micronuclei or small blebs
    eroded_nuclei_mask = erosion(eroded_nuclei_mask, disk(25))
    eroded_nuclei_mask = dilation(eroded_nuclei_mask, disk(25))
    # Nuclei segmentation to get all valid nuclei of interest
    labeled_nuclei, num_of_nuclei = ndi.label(eroded_nuclei_mask)

    # Add all valid nuclei to the list for downstream analysis
    kept_nuclei = []
    for i in range(num_of_nuclei):
        cand_nucleus = labeled_nuclei == i+1
        cand_nucleus[cand_nucleus>0]=1
        cand_nucleus = np.array(cand_nucleus)
        kept_nuclei.append(cand_nucleus)
       
    # Analyze each cell from this fov for all channels
    for cell_id in range(len(kept_nuclei)):
        # Generate masks for the MDC-labeled chromosome ("the reincorporated micronuclei") on the mdc1 channel
        # Q3 + 3* IQR (mdc1 intensity for the analyzed nucleus) used as the threshold for its segmentation
        nuclei_to_measure = kept_nuclei[cell_id]
        mdc_intensity = ch_img_dict[str(ch_dict[factor_key])]*nuclei_to_measure
        mdc_intensity_filtered = np.array([i for i in mdc_intensity.flatten() if i >0])
        mdc_iqr = (np.percentile(mdc_intensity_filtered,75)-np.percentile(mdc_intensity_filtered,25))
        mdc_positive_th = np.percentile(mdc_intensity_filtered,75) + mdc_iqr*3
        mdc_chr_mask = (ch_img_dict[str(ch_dict[factor_key])]>mdc_positive_th)*nuclei_to_measure
        
        # Removing small potential 53BP foci (change pixel size in the box above)
        mdc_chr_mask = remove_small_objects(mdc_chr_mask, small_53BP_size, connectivity=1)
        
        # Remove tiny unsure foci if no 53BP foci size exclusion were applied
        if small_53BP_size == 0:
            small_53BP_size = 100
        else:
            pass
        
        # Analyze valid mdc-labeled nuclear regions only
        if sum(mdc_chr_mask.flatten())>small_53BP_size:
            
            # Generate mask for the MDC-negative nucleous
            # lower 10% of the mdc1 intensity for the analyzed nucleus used to define the nucleolus 
            dna_intensity = ch_img_dict[str(ch_dict[dna_key])]*nuclei_to_measure
            dna_intensity_filtered = np.array([i for i in dna_intensity.flatten() if i >0])
            dna_negative_th = np.percentile(dna_intensity_filtered,10)
            mdc_negative_th = np.percentile(mdc_intensity_filtered,10)
            
            pol2_intensity_filtered = ch_img_dict[str(ch_dict[dna_key])]*nuclei_to_measure
            pol2_negative_th = np.percentile(pol2_intensity_filtered,10)
            
            
            dna_negative_mask = (ch_img_dict[str(ch_dict[dna_key])]<dna_negative_th)*nuclei_to_measure
            mdc_negative_mask = (ch_img_dict[str(ch_dict[factor_key])]<mdc_negative_th)*nuclei_to_measure         
            nucleolus_mask = dilation(mdc_negative_mask, disk(2))
            
            
            # Generate mask for the rest chr in nuclei by removing the mdc-labeled and nucleolus regions
            ctrl_chr_mask1 = nuclei_to_measure *  (mdc_chr_mask == 0)
            ctrl_chr_mask2 = nuclei_to_measure * (nucleolus_mask ==0)
            ctrl_chr_mask = ctrl_chr_mask1*ctrl_chr_mask2
            # Generate mask for the non-cell/non-nuclei background
            noncell_background = erosion(nuclei_mask==0,disk(10))
            
            
            # Save mask images in the source directory
            mask_save_path = data_save_folder[:-2] + os.sep + f'segmentations_{small_53BP_size}' + os.sep + f'fov_{_fov_id}'
            if not os.path.exists(mask_save_path):
                os.makedirs(mask_save_path)
            mdc_chr_mask_savename = f'cell_{cell_id}_incoporated_chr.tif'
            ctrl_chr_mask_savename = f'cell_{cell_id}_control_chr_cell.tif'
            io.imsave(mask_save_path+os.sep+mdc_chr_mask_savename,(mdc_chr_mask))
            io.imsave(mask_save_path+os.sep+ctrl_chr_mask_savename,(ctrl_chr_mask))
            
            
            # Generate an empty list to store measurement for each cells
            measurement_for_each_cell = []
            # Add fov_id and cell_id 
            measurement_for_each_cell.append (int(_fov_id))
            measurement_for_each_cell.append (int(cell_id))
            # Measure and add pixel area for MDC-labeled chromosome, control chromosomes, and nucleolus
            measurement_for_each_cell.append (int(sum(mdc_chr_mask.flatten())))
            measurement_for_each_cell.append (int(sum(ctrl_chr_mask.flatten())))
            measurement_for_each_cell.append (int(sum(nucleolus_mask.flatten())))
            
            # Measure and add intensity for MDC-labeled chromosome, control chromosomes, and nucleolus for each channel
            for ch in range(num_of_ch):
                ch_to_measure = ch_img_dict[str(ch+1)]
                # Measure ave intensity
                mdc_chr_ave_intensity = (ch_to_measure * mdc_chr_mask)[(ch_to_measure * mdc_chr_mask)!=0].mean()
                ctrl_chr_ave_intensity = (ch_to_measure * ctrl_chr_mask)[(ch_to_measure * ctrl_chr_mask)!=0].mean()
                nucleolus_ave_intensity = (ch_to_measure * nucleolus_mask)[(ch_to_measure * nucleolus_mask)!=0].mean()
                noncell_background_ave_intensity = (ch_to_measure*noncell_background)[(ch_to_measure*noncell_background)!=0].mean()
                # Background subtraction
                mdc_chr_ave_intensity = mdc_chr_ave_intensity - noncell_background_ave_intensity
                ctrl_chr_ave_intensity = ctrl_chr_ave_intensity - noncell_background_ave_intensity
                nucleolus_ave_intensity = nucleolus_ave_intensity - noncell_background_ave_intensity
                # Add measurements
                measurement_for_each_cell.append(mdc_chr_ave_intensity)
                measurement_for_each_cell.append(ctrl_chr_ave_intensity)
                measurement_for_each_cell.append(nucleolus_ave_intensity)
                        
            # Add measurements for each cell to the measurements for all fovs
            measurement_for_each_cell = np.array(measurement_for_each_cell)
            measurement_for_all_fov.append(measurement_for_each_cell)
               

Analyzing the plane 4 for the image 0 in this dataset




Analyzing the plane 1 for the image 1 in this dataset




Analyzing the plane 3 for the image 2 in this dataset




Analyzing the plane 4 for the image 3 in this dataset




Analyzing the plane 2 for the image 4 in this dataset




Analyzing the plane 1 for the image 5 in this dataset




Analyzing the plane 3 for the image 6 in this dataset




Analyzing the plane 2 for the image 7 in this dataset




Analyzing the plane 3 for the image 8 in this dataset




Analyzing the plane 7 for the image 9 in this dataset
Analyzing the plane 7 for the image 10 in this dataset




Analyzing the plane 3 for the image 11 in this dataset




Analyzing the plane 5 for the image 12 in this dataset




Analyzing the plane 2 for the image 13 in this dataset




Analyzing the plane 7 for the image 14 in this dataset




Analyzing the plane 2 for the image 15 in this dataset




Analyzing the plane 4 for the image 16 in this dataset




Analyzing the plane 2 for the image 17 in this dataset




Analyzing the plane 1 for the image 18 in this dataset




Analyzing the plane 3 for the image 19 in this dataset




Analyzing the plane 4 for the image 20 in this dataset




Analyzing the plane 5 for the image 21 in this dataset




Analyzing the plane 1 for the image 22 in this dataset




Analyzing the plane 2 for the image 23 in this dataset




Analyzing the plane 3 for the image 24 in this dataset




# Define channel information to save measurements as excel here:

In [19]:
# Replace the protein/dna name below as how they are ordered for the channel (ch405, ch488, ch560, ch647)
# For example, ch_save_list = ["dna","rpa2","mdc1","pol2S5"] if rpa2 in ch488
ch_save_list = ["dna","53BP1NB","mdc1","pol2S5"]

ch1 = ch_save_list[0]
ch2 = ch_save_list[1]
ch3 = ch_save_list[2]
ch4 = ch_save_list[3]

In [20]:
import pandas as pd


df = pd.DataFrame(measurement_for_all_fov)  
df.columns = ['fov_id','cell_id','area_incorporated_chr','area_control_chr','area_nucleolus',
             f'ave_intensity_incorporated_chr_{ch1}',f'ave_intensity_control_chr_{ch1}',f'ave_intensity_nucleolus_{ch1}',
             f'ave_intensity_incorporated_chr_{ch2}',f'ave_intensity_control_chr_{ch2}',f'ave_intensity_nucleolus_{ch2}',
             f'ave_intensity_incorporated_chr_{ch3}',f'ave_intensity_control_chr_{ch3}',f'ave_intensity_nucleolus_{ch3}',
             f'ave_intensity_incorporated_chr_{ch4}',f'ave_intensity_control_chr_{ch4}',f'ave_intensity_nucleolus_{ch4}']

analysis_savename = f'measurement_for_all_fov_background_subtracted_{small_53BP_size}.xlsx'
analysis_save_path = data_save_folder[:-2] + os.sep + 'analysis'
if not os.path.exists(analysis_save_path):
    os.makedirs(analysis_save_path)
df.to_excel(analysis_save_path+os.sep+analysis_savename)


In [21]:
df

Unnamed: 0,fov_id,cell_id,area_incorporated_chr,area_control_chr,area_nucleolus,ave_intensity_incorporated_chr_dna,ave_intensity_control_chr_dna,ave_intensity_nucleolus_dna,ave_intensity_incorporated_chr_53BP1NB,ave_intensity_control_chr_53BP1NB,ave_intensity_nucleolus_53BP1NB,ave_intensity_incorporated_chr_mdc1,ave_intensity_control_chr_mdc1,ave_intensity_nucleolus_mdc1,ave_intensity_incorporated_chr_pol2S5,ave_intensity_control_chr_pol2S5,ave_intensity_nucleolus_pol2S5
0,0.0,2.0,968.0,17152.0,3326.0,200.772177,175.639266,153.895317,382.100749,29.050962,13.59845,357.787416,117.335111,58.198902,171.361483,244.305456,103.064294
1,1.0,1.0,448.0,30489.0,5423.0,56.484429,84.159319,73.437719,291.849691,21.778841,15.865183,151.886202,59.261723,36.516434,36.819521,81.673977,44.916382
2,1.0,2.0,353.0,26644.0,4911.0,177.320173,142.635867,79.251886,601.480667,68.319203,28.92361,579.93032,141.503614,52.60829,220.779481,245.533004,71.152067
3,2.0,1.0,1095.0,23824.0,4450.0,101.143147,102.102241,61.453316,350.270021,39.442221,20.178128,519.640586,159.660324,62.565457,216.024471,347.928268,111.805958
4,3.0,2.0,949.0,16981.0,3479.0,207.116626,183.242613,158.307829,387.003058,30.5811,14.655197,267.823199,89.924923,44.723399,166.753393,244.883053,103.44402
5,4.0,0.0,603.0,13599.0,2768.0,203.248098,183.930845,135.987864,418.801436,47.627301,28.977722,327.140258,82.880965,41.623249,202.583531,308.169062,116.079952
6,5.0,0.0,465.0,24440.0,4557.0,57.559111,32.366393,23.735368,45.917032,15.306228,9.587605,80.10053,21.859502,10.675074,33.832623,60.44942,39.089854
7,5.0,2.0,1183.0,19504.0,3672.0,67.007974,57.902559,36.024264,153.043579,21.490407,8.422029,284.544884,68.866438,29.154189,89.740779,127.362102,50.037181
8,5.0,3.0,832.0,26982.0,5201.0,67.1176,55.139702,46.949584,596.679806,59.238818,28.078532,511.24756,167.860758,78.066917,165.032926,270.936942,75.983678
9,6.0,0.0,2129.0,20065.0,3776.0,139.004234,132.925681,121.140571,446.245802,75.290458,46.118155,606.655065,68.696516,35.487871,397.888505,373.31573,143.21677
