In [1]:
import numpy as np
import tifffile
import os
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
from tqdm import tqdm

# define cell types

In [8]:
from skimage.measure import regionprops, regionprops_table
from skimage.util import img_as_uint, img_as_float, img_as_ubyte
from skimage.filters import threshold_otsu
from skimage.exposure import rescale_intensity
from skimage.segmentation import expand_labels
import pandas as pd

## test

In [3]:
mask_dir = r'..\..\coculture_diagonal\primed_pbmc\00_masks\all_cells'
mask_l = os.listdir(mask_dir)
mask_l.sort()

cd4_dir = r'..\..\coculture_diagonal\primed_pbmc\00_registered\CD4'
cd4_l = os.listdir(cd4_dir)
cd4_l.sort()

In [4]:
fov = '000'
mask = tifffile.imread(os.path.join(mask_dir,mask_l[0]))
cd4_im = img_as_float(tifffile.imread(os.path.join(cd4_dir,cd4_l[0],'C4_'+cd4_l[0]+'_CD4.tif')))
cd4_norm = (cd4_im - np.min(cd4_im))/ (np.max(cd4_im) - np.min(cd4_im))

In [11]:
properties = ['area','centroid','mean_intensity']
mask = expand_labels(mask,20)
props = regionprops(mask, cd4_norm, cache=True)
regions_table = regionprops_table(mask, intensity_image=cd4_im, cache=True, properties=properties)

In [12]:
expanded_mask = expand_labels(mask,20)
tifffile.imwrite(r'..\..\coculture_diagonal\primed_pbmc\00_masks\expansion_test\expanded_mask.tif',img_as_uint(expanded_mask))

## Cell phenotyping

In [15]:
marker_dir = r'..\..\coculture_diagonal\primed_pbmc\00_registered\CD4'
marker_l = os.listdir(marker_dir)
marker_l.sort()

mask_dir = r'..\..\coculture_diagonal\primed_pbmc\00_masks\all_cells'
mask_l = os.listdir(mask_dir)
mask_l.sort()

out_dir = r'..\..\coculture_diagonal\primed_pbmc\00_analysis\cell_types\raw'
os.makedirs(out_dir,exist_ok=True)

In [17]:
properties = ['label','area','centroid','mean_intensity']
for fov in tqdm(marker_l):
    im = img_as_float(tifffile.imread(os.path.join(marker_dir,fov,'C4_'+fov+'_CD4.tif')))
    im_norm = (im - np.min(im))/ (np.max(im) - np.min(im))
    cd4_mask = im_norm > threshold_otsu(im_norm)
    cd4_mask = img_as_ubyte(cd4_mask)

    mask = tifffile.imread(os.path.join(mask_dir,fov+'_mask.tif'))
    mask = expand_labels(mask,20)
    regions_table = regionprops_table(mask, intensity_image=im, cache=True, properties=properties)
    regions_table = pd.DataFrame(regions_table)
    regions_table = regions_table[regions_table['area']>100]
    regions_table.to_csv(os.path.join(out_dir,'cd4',fov+'_CD4.csv'))

100%|██████████| 120/120 [01:28<00:00,  1.36it/s]


## Labeling marker+ cells

In [64]:
from sklearn.cluster import KMeans

In [65]:
in_dir = r'..\..\coculture_diagonal\primed_pbmc\00_analysis\cell_types\raw\cd8'
fov_l = os.listdir(in_dir)
fov_l.sort()

In [66]:
l = []
for fov in fov_l:
    if fov.endswith('.csv'):
        df = pd.read_csv(os.path.join(in_dir,fov),index_col=0)
        l.append(df)
df = pd.concat(l)
expression = df['mean_intensity'].to_numpy()

In [67]:
# otsu threshold
thresh = threshold_otsu(expression)

In [68]:
label = (expression > thresh*0.8).astype('int')

In [69]:
np.sum(label)

607

In [70]:
# Check position of CD4+ cells
curr = 0
cd4_dir = r'..\..\coculture_diagonal\primed_pbmc\00_registered\CD8'
cd4_l = os.listdir(cd4_dir)
cd4_l.sort()
out_dir = r'..\..\coculture_diagonal\primed_pbmc\00_analysis\cell_types_visual\cd8'
for i,item in tqdm(enumerate(l)):
    # Load cd4 image, get gray scale image
    im = img_as_ubyte(tifffile.imread(os.path.join(cd4_dir,cd4_l[i],'C4_'+cd4_l[i]+'_CD8.tif')))
    im = rescale_intensity(im)
    colored = np.zeros((im.shape[0],im.shape[1],3),dtype='uint8')
    colored[:,:,0] = im
    colored[:,:,1] = im
    colored[:,:,2] = im

    length = item.shape[0]
    item.insert(loc=item.shape[1],column='cd8_label',value=label[curr:curr+length])
    curr += length
    temp = item.groupby('cd8_label')
    if not 1 in temp.groups.keys():
        tifffile.imwrite(os.path.join(out_dir,fov_l[i].split('_')[0].zfill(3)+'_CD8.tif'),colored)
        continue
    temp = temp.get_group(1)
    for j in range(temp.shape[0]):
        row = int(temp.iloc[j]['centroid-0'])
        col = int(temp.iloc[j]['centroid-1'])
        colored[row-2:row+2,col-2:col+2,0] = 0
        colored[row-2:row+2,col-2:col+2,1] = 255
        colored[row-2:row+2,col-2:col+2,2] = 0
    tifffile.imwrite(os.path.join(out_dir,fov_l[i].split('_')[0].zfill(3)+'_CD8.tif'),colored)

120it [00:44,  2.72it/s]


In [71]:
out_dir = r'..\..\coculture_diagonal\primed_pbmc\00_analysis\cell_types\labeled\cd8'
for i,item in enumerate(l):
    item.to_csv(os.path.join(out_dir,str(i).zfill(3)+'_CD8.csv'))

## Apply additional and removal masks

In [80]:
from skimage.measure import label

In [81]:
removal_dir = r'..\..\coculture_diagonal\primed_pbmc\00_analysis\cell_types_visual\cd8\removal_mask'
removal_l = os.listdir(removal_dir)
removal_l.sort()

addition_dir = r'..\..\coculture_diagonal\primed_pbmc\00_analysis\cell_types_visual\cd8\addition_mask'
addition_l = os.listdir(addition_dir)
addition_l.sort()

raw_masks = r'..\..\coculture_diagonal\primed_pbmc\00_masks\all_cells'
detected_dir = r'..\..\coculture_diagonal\primed_pbmc\00_analysis\cell_types\labeled\cd8'

fov_l = os.listdir(raw_masks)
fov_l.sort()

In [82]:
def relabeling(fov):
    raw_mask = tifffile.imread(os.path.join(raw_masks,fov+'_mask.tif'))
    detected = pd.read_csv(os.path.join(detected_dir,fov+'_CD8.csv'),index_col=0)
    detected = detected[detected['cd8_label']==1]
    if fov+'.tif' in removal_l:
        removal = tifffile.imread(os.path.join(removal_dir,fov+'.tif'))
        in_removal = removal[detected['centroid-0'].to_numpy().astype(int),detected['centroid-1'].to_numpy().astype(int)]
        detected.insert(loc=detected.shape[1],column='removal',value=in_removal)
        detected = detected[detected['removal']==0]
    labels = detected['label'].to_numpy()
    curr_label = 1
    new_label = []
    for i in range(labels.shape[0]):
        temp = (raw_mask == labels[i]) * curr_label
        new_label.append(temp)
        curr_label += 1
    if fov+'.tif' in addition_l:
        addition = tifffile.imread(os.path.join(addition_dir,fov+'.tif'))
        labeled_addition = label(addition)
        for i in range(np.max(labeled_addition)):
            temp = (labeled_addition == i+1) * curr_label
            new_label.append(temp)
            curr_label += 1
    if len(new_label) == 0:
        return np.zeros(raw_mask.shape)
    stacked = np.stack(new_label,axis=0)
    return np.max(stacked,axis=0)

In [84]:
fovs = [fov.split('_')[0] for fov in fov_l]
fovs = fovs[:120]
masks = Parallel(n_jobs=-2,verbose=24)(delayed(relabeling)(fov) for fov in fovs)

[Parallel(n_jobs=-2)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=-2)]: Done   1 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-2)]: Done   2 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-2)]: Done   3 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-2)]: Done   4 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-2)]: Done   5 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-2)]: Done   6 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-2)]: Done   7 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-2)]: Done   8 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-2)]: Done   9 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-2)]: Done  10 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-2)]: Done  11 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-2)]: Done  12 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-2)]: Done  13 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-2)]: Done  14 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-2)]: Done  15 tasks      | elapsed:   

In [85]:
out_dir = r'..\..\coculture_diagonal\primed_pbmc\00_masks\pbmc_masks_2d\cd8'
_ = Parallel(n_jobs=-2,verbose=24)(delayed(tifffile.imwrite)(os.path.join(out_dir,fov+'_mask.tif'),mask) for fov,mask in zip(fovs,masks))

[Parallel(n_jobs=-2)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=-2)]: Done   1 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-2)]: Done   2 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-2)]: Done   3 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-2)]: Done   4 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-2)]: Done   5 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-2)]: Done   6 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-2)]: Done   7 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-2)]: Done   8 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-2)]: Done   9 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-2)]: Done  10 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-2)]: Done  11 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-2)]: Done  12 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-2)]: Done  13 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-2)]: Done  14 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-2)]: Done  15 tasks      | elapsed:   

In [86]:
n_cells = 0
for item in masks:
    n_cells += np.max(item)
print(n_cells)

636.0


# Locate 3D position of segmentation

In [87]:
from skimage.filters import gaussian, laplace
from joblib import Parallel, delayed
from tqdm import tqdm

In [88]:
nuclei_dir = r'..\..\coculture_diagonal\primed_pbmc\00_registered\RNA'
fov_l = os.listdir(nuclei_dir)
fov_l.sort()

mask_dir = r'..\..\coculture_diagonal\primed_pbmc\00_masks\pbmc_masks_2d\cd8'
mask_l = os.listdir(mask_dir)
mask_l.sort()

In [89]:
def calculate_focus_score(image,sigma=2):
    smoothed = gaussian(image, sigma=sigma)
    log = laplace(smoothed)
    return np.var(log)

In [90]:
sigma = 2
z_slice = {'row':[],'col':[],'z':[],'cellID':[]}
for i in tqdm(range(len(mask_l))):
    mask = img_as_uint(tifffile.imread(os.path.join(mask_dir,mask_l[i])))

    fov = mask_l[i].split('_')[0]

    dapi = tifffile.imread(os.path.join(nuclei_dir,fov,'cycle1_stack_C1.tif'))
    regions = regionprops(mask)
    
    for r in regions:
        min_row, min_col, max_row, max_col = r.bbox
        dapi_temp = dapi[:,min_row:max_row,min_col:max_col]
        z_slice['row'] += [r.centroid[0]]
        z_slice['col'] += [r.centroid[1]]
        
        focus_scores = Parallel(n_jobs=4)(delayed(calculate_focus_score)(dapi_temp[i,:,:]) for i in range(dapi_temp.shape[0]))
        focus_scores = np.array(focus_scores)
        z_slice['z'] += [np.argmax(focus_scores)]
        z_slice['cellID'] += [fov+'_'+str(r.label)]

  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
  return _convert(image, np.uint16, force_copy)
100%|██████████| 120/120 [11:36<00:00,  5.81s/it]


In [91]:
cell_df = pd.DataFrame(z_slice)

In [92]:
fov_l = [t.split('_')[0] for t in cell_df['cellID'].to_list()]
cell_df.insert(loc=cell_df.shape[1],column='fov',value=fov_l)

In [93]:
cell_df.to_csv(r'..\..\coculture_diagonal\primed_pbmc\00_analysis\cell_3d_pos_cd8.csv')

# Generate cell DF

In [49]:
cell_df = pd.read_csv(r'..\..\coculture_diagonal\primed_pbmc\00_analysis\cell_3d_pos_cd8.csv',index_col=0)

In [50]:
dots_dir = r'..\..\coculture_diagonal\primed_pbmc\00_analysis\detected_dots'
dots_l = os.listdir(dots_dir)
dots_l.sort()

mask_dir = r'..\..\coculture_diagonal\primed_pbmc\00_masks\pbmc_masks_2d\cd8'
mask_l = os.listdir(mask_dir)
mask_l.sort()

out_dir = r'..\..\coculture_diagonal\primed_pbmc\00_analysis\cell_df\cd8'
os.makedirs(out_dir,exist_ok=True)

In [51]:
fov_l = cell_df['fov'].unique().tolist()
fov_l.sort()
cell_by_fov = cell_df.groupby('fov')

In [52]:
for fov_int in tqdm(fov_l):
    # Load fov cell df, cell mask, detected dots
    fov_df = cell_by_fov.get_group(fov_int)
    fov = str(fov_int).zfill(3)
    mask = tifffile.imread(os.path.join(mask_dir,fov+'_mask.tif'))
    dots = pd.read_csv(os.path.join(dots_dir,fov+'_dots.csv'), index_col=0)

    # Apply z_position correction
    dots_by_gene = dots.groupby('gene')
    cycle1_gene = pd.concat([dots_by_gene.get_group(fov+'_gene1'),
                             dots_by_gene.get_group(fov+'_gene2'),
                             dots_by_gene.get_group(fov+'_gene3')])
    z = np.sort(cycle1_gene['z'].unique())
    slices = np.zeros(z.shape[0]-2)
    for i in range(slices.shape[0]):
        z_min = z[i]
        z_max = z[i+2]
        temp = cycle1_gene[cycle1_gene['z'] >= z_min]
        temp = temp[temp['z'] <= z_max]
        slices[i] = temp.shape[0]
    cycle1_gene['z'] = list(cycle1_gene['z'] - z[np.argmax(slices)+1])
    fov_df['z'] = list(fov_df['z'] * 3)
    fov_df['z'] = list(fov_df['z'] - z[np.argmax(slices)+1])
    
    cycle2_gene = pd.concat([dots_by_gene.get_group(fov+'_gene4'),
                             dots_by_gene.get_group(fov+'_gene5'),
                             dots_by_gene.get_group(fov+'_gene6')])
    z = np.sort(cycle2_gene['z'].unique())
    slices = np.zeros(z.shape[0]-2)
    for i in range(slices.shape[0]):
        z_min = z[i]
        z_max = z[i+2]
        temp = cycle2_gene[cycle2_gene['z'] >= z_min]
        temp = temp[temp['z'] <= z_max]
        slices[i] = temp.shape[0]
    cycle2_gene['z'] = list(cycle2_gene['z'] - z[np.argmax(slices)+1])

    # Recombine cycle1 and cycle2 dots, find dots within 30um of cell centroid
    r = 30
    corrected_dots = pd.concat([cycle1_gene,cycle2_gene])
    by_cell = fov_df.groupby('cellID')
    cells = fov_df['cellID'].tolist()

    pbmc_mask = tifffile.imread(os.path.join(mask_dir,fov+'_mask.tif'))
    pbmc_mask = expand_labels(pbmc_mask,20)

    for c in cells:
        cell = by_cell.get_group(c)
        cell_centroid = cell.values[0,:3].astype('float')
        distance = np.sqrt(np.sum((corrected_dots.values[:,:2].astype('float') - cell_centroid[:2])**2,axis=1))
        distance_mask = distance < (r * (2304/249.6))
        corrected_dots.insert(loc=corrected_dots.shape[1],column='in_cell',value=distance_mask)
        micro_envi = corrected_dots[corrected_dots['in_cell']==True]
        micro_envi.drop(columns=['in_cell'],inplace=True)
        corrected_dots.drop(columns=['in_cell'],inplace=True)

        cell_mask = pbmc_mask == int(c.split('_')[1])
        in_immune = cell_mask[micro_envi['row'].to_numpy(),micro_envi['col'].to_numpy()]
        micro_envi.insert(loc=micro_envi.shape[1],column='in_immune',value=in_immune)
        micro_envi.to_csv(os.path.join(out_dir,cell.values[0,3]+'.csv'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fov_df['z'] = list(fov_df['z'] * 3)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fov_df['z'] = list(fov_df['z'] - z[np.argmax(slices)+1])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  micro_envi.drop(columns=['in_cell'],inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas

# ki67 expression