In [64]:
import glob
import os
import sys
from itertools import cycle
from pathlib import Path, PureWindowsPath

import cv2
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tifffile
import skimage
import pandas as pd
from skimage.filters import threshold_li
from tqdm.notebook import tqdm, trange
from skimage import exposure, io
from joblib import Parallel, delayed
import napari

In [65]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [66]:
data_dir = r'Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines\subcellular_clustering\v2\bm'

# Extract data directory info

In [67]:
def get_info(img_folder):
    """Function returns the info from folder containing multi-cycle staigning on cell

    Args:
        img_folder (str) : imgage folder path to get information
        name_dict (dict) : three level dictionnary mapping cycle -> channel -> marker name

    Returns:
        pandas dataframe with information
    """
    images_path = []
    markers = []
    fovs = []
    cycles = []
    cell_types = []
    
    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(img_folder):
        for name in sorted(filenames):
            if "tif" in name and 'Mask' not in name:
                # Get information from image name
                marker = name.split('_')[1:4]
                path = os.path.join(dirpath, name)
                fov = dirpath.split('\\')[-1]
                cycle = name.split('_')[0][-1]
                cell_type = dirpath.split('\\')[-2]
                if cell_type == 'bm':
                    cell_type = 'HBM'
                elif cell_type == 'hch':
                    cell_type = 'HCH'
                elif cell_type == 'exp1_uc':
                    cell_type = 'HUC_1'
                elif cell_type == 'exp2_uc':
                    cell_type = 'HUC_2'
                markers.append(marker)
                images_path.append(path)
                fovs.append(fov)
                cycles.append(cycle)
                cell_types.append(cell_type)
    info = {
        "Marker": markers,
        "Path": images_path,
        "FOV": fovs,
        "Cycle": cycles,
        "Cell Type": cell_types
    }
    df = pd.DataFrame(info)

    return df

def get_masks_info(img_folder):
    """Function returns the info from folder containing multi-cycle staigning on cell

    Args:
        img_folder (str) : imgage folder path to get information
        name_dict (dict) : three level dictionnary mapping cycle -> channel -> marker name

    Returns:
        pandas dataframe with information
    """
    images_path = []
    fovs = []
    cell_types = []
    cell_ids = []
    
    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(img_folder):
        for name in sorted(filenames):
            if "tif" in name and 'Mask' in name:
                # Get information from image name
                path = os.path.join(dirpath, name)
                fov = dirpath.split('\\')[-1]
                cell_type = dirpath.split('\\')[-2]
                cell_id = name.split('_')[0][-1] 
                if cell_type == 'bm':
                    cell_type = 'HBM'
                elif cell_type == 'hch':
                    cell_type = 'HCH'
                elif cell_type == 'exp1_uc':
                    cell_type = 'HUC_1'
                elif cell_type == 'exp2_uc':
                    cell_type = 'HUC_2'
                images_path.append(path)
                fovs.append(fov)
                cell_types.append(cell_type)
                cell_ids.append(cell_id)
    info = {
        "Path": images_path,
        "FOV": fovs,
        "Cell Type": cell_types,
        "Cell Id": cell_ids
    }
    df = pd.DataFrame(info)

    return df

In [68]:
df = get_info(data_dir)

In [69]:
df

Unnamed: 0,Marker,Path,FOV,Cycle,Cell Type
0,"[gapdh, empty, actb]",Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,001,1,HBM
1,"[il8, il6, ccl11]",Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,001,2,HBM
2,"[col1a1, empty, nanog]",Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,001,3,HBM
3,"[sox9, eef2, spp1]",Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,001,4,HBM
4,"[empty, runx1, pdl1]",Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,001,5,HBM
...,...,...,...,...,...
1229,"[il8, il6, ccl11]",Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,028,3,HCH
1230,"[col5a2, col1a1, pdl1]",Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,028,4,HCH
1231,"[malat1, runx1, cxcr4]",Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,028,5,HCH
1232,"[empty, mki67, nanog]",Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,028,6,HCH


In [70]:
df['Cell Type'].unique()

array(['HBM', 'HUC_1', 'HUC_2', 'HCH'], dtype=object)

In [71]:
df_mask = get_masks_info(data_dir)

In [81]:
df_mask

Unnamed: 0,Path,FOV,Cell Type,Cell Id
0,Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,001,HBM,1
1,Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,003,HBM,1
2,Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,003,HBM,2
3,Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,003,HBM,3
4,Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,004,HBM,1
...,...,...,...,...
588,Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,027,HCH,0
589,Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,027,HCH,1
590,Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,028,HCH,1
591,Y:\coskun-lab\Zhou\4_HCR\2D_analyses_pipelines...,028,HCH,2


In [73]:
df_mask['Cell Type'].unique()

array(['HBM', 'HUC_1', 'HUC_2', 'HCH'], dtype=object)

# Create merged mask

In [74]:
from joblib import Parallel, delayed

def read_img(path):
    img = skimage.io.imread(path)
    if len(img.shape) > 2:
        img = img[0, ...]
    return img 

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [75]:
group = df_mask.groupby(['Cell Type', 'FOV'])

In [77]:
for name, df_group in tqdm(group):
    paths = df_group.Path.tolist()
    cell_ids = df_group['Cell Id'].tolist()
    path = Path.cwd().parent / 'data' / 'masks2' / f'{name[0]}_{name[1]}.tiff'
    if os.path.exists(path):
        continue
    
    # Read all mask images
    imgs = joblib_loop(read_img, paths)
    
    # Define combined mask
    mask = np.zeros(imgs[0].shape, dtype=np.uint8)
    
    # Loop per cell mask ID
    for ii, cell_id in enumerate(cell_ids):
        mask = np.where(imgs[ii]>0, int(cell_id), mask)
    
    # Save mask
    skimage.io.imsave(path, mask)


  0%|          | 0/195 [00:00<?, ?it/s]

  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimage.io.imsave(path, mask)
  skimag

# Read pixel level info

In [84]:
from joblib import Parallel, delayed
from skimage import exposure, io

def contrast_streching(img, n_min=20, n_max=99.9):
    if len(img.shape)>2:
        img = img[0,...,1:]
    p2, p98 = np.percentile(img, (n_min, n_max))
    return exposure.rescale_intensity(
        img, in_range=(p2, p98), out_range=(0, 1)
    )

def read_img(path):
    return contrast_streching(skimage.io.imread(path))

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [85]:
group = df.groupby(['Cell Type', 'FOV'])

In [86]:
for name, df_group in tqdm(group):
    df_appended = []

    paths = df_group.Path.tolist()
    markers = df_group['Marker'].tolist()
    
    # Get Cell mask
    path = Path.cwd().parent / 'data' / 'masks2' / f'{name[0]}_{name[1]}.tiff'
    if not os.path.exists(path):
        continue
    mask_cell = skimage.io.imread(path)
    
    # Read all mask images
    imgs = joblib_loop(read_img, paths)
    
    # Get imgs and markers flatten
    imgs_flatten = []
    markers_flatten = []
    for ii, marker_list in enumerate(markers):
        for jj, marker in enumerate(marker_list):
            if marker == 'empty':
                continue
            img = contrast_streching(imgs[ii][...,jj])
            imgs_flatten.append(img)
            markers_flatten.append(marker)
    
    # Get all intensity mask
    imgs_stack = np.stack(imgs_flatten)
    mask_intensity = (imgs_stack < 0.3).all(0)

    for cell_id in np.unique(mask_cell):
        if cell_id == 0:
            continue
        
        # Get combined mask
        mask_combined = cv2.bitwise_and(~mask_intensity.astype(int), (mask_cell==cell_id).astype(int))

        # Get pixel intensity
        rows, cols = np.where(mask_combined)
        cell_pixels = imgs_stack[:, rows, cols]

        # Create dataframe
        df_pixels = pd.DataFrame(cell_pixels.T, columns=markers_flatten)
        df_pixels["Cell Type"] = name[0]
        df_pixels["FOV"] = name[1]
        df_pixels["X"] = rows
        df_pixels["Y"] = cols
        df_pixels["Id"] = cell_id
        df_appended.append(df_pixels)
    df_pixels = pd.concat(df_appended, ignore_index=True)
    
    path = Path.cwd().parent / 'data' / 'h5' / f'{name[0]}_{name[1]}.h5'
    df_pixels.to_hdf(path, 'df', mode='w', format='table', data_columns=True)


  0%|          | 0/201 [00:00<?, ?it/s]

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
