# Compute cell centers from cell masks

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
import os
import sys
import dask
import dask.array as da
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
from skimage.io import imread
import tifffile as tf

# directories and inputs

In [2]:
# # folder paths for PKL files with RNA positions
# dotPaths = []
# dotPaths.append(r"Y:\coskun-lab\Zhou\4_HCR\20210614_hchCulture\00_registered_images\detected_dots")
# dotPaths.append(r"Y:\coskun-lab\Zhou\4_HCR\20210507_cytokines\uc\00_registered_images\detected_dot_positions")
# dotPaths.append(r"Y:\coskun-lab\Zhou\4_HCR\20210324_11gene\BM\registered images\detected_dots_positions_v2")
# dotPaths.append(r"Y:\coskun-lab\Zhou\4_HCR\20210324_11gene\UC\00_registered_images\detected_dots_positions")

# folder with mask paths for different tissue sources
maskPaths = []
maskPaths.append(r"..\images\HBM\registered")
maskPaths.append(r"..\images\HUC\registered")
maskPaths.append(r"..\images\HCH\registered")
cwd = os.getcwd()

exportPath = r"..\results\spatial_statistics"

idCols = ['CellY', 'CellX', 'FOV', 'CellLabel', 'TissueSource']

# For each FOV, read cell masks and compute cell centers

In [4]:
dfCenters = pd.DataFrame(columns = idCols)

lazy_read = dask.delayed(imread)

for ii, tissuePath in enumerate(maskPaths): # each tissue source
    
    os.chdir(tissuePath)
    
    tissueSource = tissuePath.split(os.sep)[-3]
    tissueSource = tissueSource.split('_')[-1]
    if 'Culture' in tissueSource:
        tissueSource = tissueSource.replace('Culture', '')
        
    tissueSource = tissueSource.upper() # all caps
    
    # find all FOVs
    fovs = os.listdir()
    
    # preserve cell count across FOVs.  Start with max of previous
    dfSub = dfCenters.loc[dfCenters['TissueSource'] == tissueSource]
    if dfSub.size == 0:
        cellCount = 0 # no cells recorded yet
        
    else:
        cellCount = dfSub['CellLabel'].max()
    
    for jj, fov in enumerate(tqdm(fovs)): # each FOV
        
        os.chdir(fov)
        
        # find all cell masks for this FOV
        cellFiles = [f for f in os.listdir() if f.endswith('.tif') and 'cell' in f and 'Mask' in f]
        if len(cellFiles) == 0: # empty, no cells
            os.chdir('..') # go back to main folder
            continue # next iter
        
        # get image dimensions
        img = tf.TiffFile(cellFiles[0])
        dimY, dimX = img.pages[0].shape
        dtype = img.pages[0].dtype
        img.close()
        
        for kk, cellFile in enumerate(cellFiles): # each cell mask TIF
            
            # cellLabel = cellFile.split('_')[0].replace('cell', '')
            # cellLabel = int(cellLabel)
            cellCount += 1
            
            # read cell mask
            mask = lazy_read(cellFile)
            mask = da.from_delayed(mask, shape = [dimY, dimX], dtype = dtype)
            
            # calculate cell center of mass
            y, x = np.where(mask > 0)
            y.compute_chunk_sizes()
            x.compute_chunk_sizes()
                        
            # append to dataframe
            dfSub = pd.DataFrame()
            dfSub['TissueSource'] = [tissueSource]
            dfSub['FOV'] = [int(fov)]
            dfSub['CellLabel'] = [cellCount]
            dfSub['CellY'] = [np.mean(y).compute()]
            dfSub['CellX'] = [np.mean(x).compute()]
            
            # check for empty mask
            if dfSub['CellY'].isna().any():
                print()
                print('Mask empty:')
                print(tissuePath)
                print(fov)
                print(cellFile)
                print(cellCount)
            
            dfCenters = pd.concat([dfCenters, dfSub])    
        
        os.chdir('..') # go back to main folder
        
    
dfCenters = dfCenters.reset_index(drop = True)

100%|█████████████████████████████████████████████████████████████████████████████████| 65/65 [00:47<00:00,  1.37it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 25/25 [00:07<00:00,  3.47it/s]
 73%|███████████████████████████████████████████████████████████▍                     | 66/90 [00:26<00:11,  2.16it/s]


Mask empty:
Y:\coskun-lab\Zhou\4_HCR\20210324_11gene\BM\registered images\v3
071
cell2_Mask.tif
168


100%|█████████████████████████████████████████████████████████████████████████████████| 90/90 [00:37<00:00,  2.39it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 58/58 [00:13<00:00,  4.17it/s]


In [5]:
dfCenters

Unnamed: 0,CellY,CellX,FOV,CellLabel,TissueSource
0,1044.813513,598.608449,1,1,HCH
1,676.179703,1475.879289,1,2,HCH
2,493.974960,1179.543497,1,3,HCH
3,1167.364571,1379.676510,1,4,HCH
4,1106.085162,1666.713592,1,5,HCH
...,...,...,...,...,...
613,942.923173,853.551543,80,118,UC
614,1066.630734,805.953528,83,119,UC
615,518.830334,678.370679,88,120,UC
616,893.811265,1088.040846,88,121,UC


In [6]:
dfCenters.groupby(['TissueSource']).max()

Unnamed: 0_level_0,CellY,CellX,FOV,CellLabel
TissueSource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BM,1934.928867,1909.086431,95,240
HCH,2005.711229,1977.883438,34,256
UC,1919.259839,1881.641431,91,122


# Export cell centers dataframe

In [7]:
os.chdir(exportPath)

dfCenters.to_pickle('02_cell_centers.pkl')