In [1]:
import os
import sys
from pathlib import Path
from typing import List

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io
from skimage.measure import regionprops_table
from skimage import exposure, measure

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Import path
module_path = str(Path.cwd().parents[0])
if module_path not in sys.path:
    sys.path.append(module_path)

from config import *

In [4]:
module_path = str(Path.cwd().parents[0] / "src")

if module_path not in sys.path:
    sys.path.append(module_path)

import utils as my_utils

In [5]:
# Define image and masks folder
info_csv = data_meta / "info_combined.csv"

df = pd.read_csv(info_csv)

In [6]:
df = df[df.After_bleach==False]

# Get nuclei, cyto, cell level features

In [7]:
def get_location(df, location):
    imgs_path = []
    markers = []
    df = df[df.Location == location]
    for row in df.itertuples():
        if row.Marker == 'Hoeschst' and row.Cycle != 'Cycle1':
            continue
        imgs_path.append(row.Path)
        markers.append(row.Marker)
    return imgs_path, markers


def get_region_props(img, cell, nuclei, cyto, marker, location, prop):
    """
    Get regionprops from masked region
    """
    cell_props = regionprops_table(cell, img, properties=prop)
    nuclei_props = regionprops_table(nuclei, img, properties=prop)
    cyto_props = regionprops_table(cyto, img, properties=prop)

    props = [cell_props, nuclei_props, cyto_props]
    prefix = ["Cell_", "Nuclei_", "Cyto_"]
    df_marker = pd.concat(
        [pd.DataFrame(prop).add_prefix(prefix[idx]) for idx, prop in enumerate(props)],
        axis=1,
    )
    df_marker["Marker"] = marker
    df_marker["Location"] = location
    df_marker["Id"] = regionprops_table(cell, properties=["label"])["label"]

    return df_marker


def measure_prop(df, masks, show=False, save=False):
    appended_data_intensity = []
    appended_data_morphology = []

    # Get each segmented cell region with corresponding nuclei and cell
    for idx, location in enumerate(masks.keys()):
        # Get cell masks
        mask_cyto = masks[location]["cyto"]
        mask_nuclei = masks[location]["nuclei"]
        cell, nuclei, cyto = my_utils.qc_nuclei(mask_cyto, mask_nuclei)
        
        # Get images
        imgs, markers = get_location(df, int(location))
        for idx, img_path in enumerate(imgs):
            img = skimage.io.imread(img_path)
            p2, p98 = np.percentile(img, (0, 99.9))
            img = exposure.rescale_intensity(img, in_range=(p2, p98))
#             print(img.shape, cell.shape, nuclei.shape, cyto.shape)
            df_marker = get_region_props(
                img, cell, nuclei, cyto, markers[idx], location, ["mean_intensity"]
            )
            appended_data_intensity.append(df_marker)

            # Get morphology only one time because morphology accross cell are the same
            if idx == 0:
                df_marker = get_region_props(
                    img, cell, nuclei, cyto, markers[idx], location, ["area"]
                )
                appended_data_morphology.append(df_marker)

                if show:
                    fig, ax = plt.subplots(1, 3, figsize=(15, 5))
                    ax[0].imshow(cell)
                    ax[1].imshow(nuclei)
                    ax[2].imshow(cyto)
                    plt.show()

    df_prop_intensity = pd.concat(appended_data_intensity, ignore_index=True)
    df_prop_intensity = df_prop_intensity.sort_values(
        by=["Location", "Id"]
    ).reset_index(drop=True)

    df_prop_morph = pd.concat(appended_data_morphology, ignore_index=True)
    df_prop_morph = df_prop_morph.sort_values(by=["Location", "Id"]).reset_index(
        drop=True
    )

    if save:
        df_prop_intensity.to_csv(data_meta / "measurements_intensity.csv", index=False)
        df_prop_morph.to_csv(data_meta / "measurements_morphology.csv", index=False)

    return df_prop_intensity, df_prop_morph

In [8]:
masks = my_utils.get_masks(data_mask)

In [9]:
df_prop_i, df_prop_m = measure_prop(df, masks, save=True, show=False)

In [10]:
df_prop_i

Unnamed: 0,Cell_mean_intensity,Nuclei_mean_intensity,Cyto_mean_intensity,Marker,Location,Id
0,11788.158099,32185.806775,567.825916,Hoeschst,1,1
1,7382.506802,12458.758859,4590.163663,p-EGFR,1,1
2,10368.843246,20900.001337,4575.867108,Concanavalin A,1,1
3,8268.703417,9150.794295,7783.483143,PCNA,1,1
4,11672.761784,27605.816136,2908.312247,B-actin,1,1
...,...,...,...,...,...,...
93334,5692.377801,7927.878788,5606.558568,B-tubin,9,53
93335,3570.571269,3431.109557,3575.925101,Cyclin D1,9,53
93336,1343.401586,13554.449883,874.628456,H3k27me3-488,9,53
93337,2495.286281,3918.272727,2440.658881,H3k4me3-647,9,53


In [11]:
df_prop_m

Unnamed: 0,Cell_area,Nuclei_area,Cyto_area,Marker,Location,Id
0,12644,4487,8157,Hoeschst,1,1
1,23141,7083,16058,Hoeschst,1,2
2,27942,5419,22523,Hoeschst,1,3
3,84133,11165,72968,Hoeschst,1,4
4,36288,7265,29023,Hoeschst,1,5
...,...,...,...,...,...,...
3452,34558,8668,25890,Hoeschst,9,49
3453,30426,5676,24750,Hoeschst,9,50
3454,21132,817,20315,Hoeschst,9,51
3455,17940,1877,16063,Hoeschst,9,52


# Get pixel level features

In [7]:
def get_location(df, location):
    imgs_path = []
    markers = []
    df = df[(df.Location == location)]
    for row in df.itertuples():
        imgs_path.append(row.Path)
        markers.append(row.Marker)
    return imgs_path, markers

def combined_imgs(imgs: List[str]) -> np.ndarray:
    """
    Return an multiplex image of dimension (num markers, height,width)
    """
    if not imgs:
        raise Exception("You passed a empty list of images path")
    pixels = []
    for img_path in imgs:
        img = skimage.io.imread(img_path)
        p2, p98 = np.percentile(img, (0, 99.9))
        img = exposure.rescale_intensity(img, in_range=(p2, p98), out_range=(0,1))
        pixels.append(img)
    return np.stack(pixels)

def non_border_cell(cell_mask):
    labels = []
    row_max, col_max = cell_mask.shape
    props = measure.regionprops(cell_mask)
    for prop in props:
        if prop['bbox'][0] == 0 or prop['bbox'][1] == 0 or prop['bbox'][2] == row_max or prop['bbox'][3] == col_max:
            continue
        else:
            labels.append(prop['label'])
    return labels

def get_multiplex_pix(df, masks, save=False):
    df_appended = []
    for idx, location in enumerate(masks.keys()):
        # Get pixels data
        imgs, markers = get_location(df, int(location))
        pixels = combined_imgs(imgs)
        
        # Get cell and nuclei mask
        mask_cyto = masks[location]["cyto"]
        mask_nuclei = masks[location]["nuclei"]
        cell, _, _ = my_utils.qc_nuclei(mask_cyto, mask_nuclei)
        
        labels = non_border_cell(cell)
        
        # Extracted all multiplex pixels in cell
        rows, cols = np.where(np.isin(cell, labels))
        cell_pixels = pixels[:, rows, cols]

        # Create dataframe
        df_pixels = pd.DataFrame(cell_pixels.T, columns=markers)
        df_pixels["Location"] = location
        df_pixels["X"] = rows
        df_pixels["Y"] = cols
        df_pixels["Id"] = cell[rows, cols]
        df_dapi = df_pixels.filter(like="Hoeschst")
        df_pixels = df_pixels.drop(df_dapi, axis=1)
        df_pixels.insert(
            0, column="Hoeschst", value=df_dapi.mean(axis=1).astype(np.uint16)
        )
        df_appended.append(df_pixels)

    df_pixels = pd.concat(df_appended, ignore_index=True)

    if save:
        df_pixels.to_csv(data_meta / "pixel_intensity.csv", index=False)

    return df_pixels

In [8]:
masks = my_utils.get_masks(data_mask)

In [9]:
df_pixels = get_multiplex_pix(df, masks, save=True)

In [10]:
df_pixels.describe()

Unnamed: 0,Hoeschst,p-EGFR,Concanavalin A,PCNA,B-actin,Phalloidin,WGA,APC,DKK1,Non-p-B-catenin,...,DKK2,EGFR,B-tubin,Cyclin D1,H3k27me3-488,H3k4me3-647,H3k9Ac-555,X,Y,Id
count,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,...,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0,75824860.0
mean,0.0007878683,0.1247236,0.08450244,0.1897957,0.1090403,0.1956644,0.07577599,0.06537012,0.1298587,0.1852942,...,0.1941212,0.2133975,0.1557571,0.1686379,0.0884437,0.07344001,0.1484827,689.2906,933.1064,29.37296
std,0.02805793,0.1427567,0.14718,0.1233,0.2021932,0.1646347,0.1410545,0.08812808,0.1223437,0.1520682,...,0.1465951,0.1402183,0.1013493,0.1189206,0.1629994,0.1371919,0.1346169,339.3109,475.6706,13.73193
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0,3.0
25%,0.0,0.02787663,0.0,0.1059972,0.0,0.07437519,0.0,0.01532702,0.05109489,0.07689736,...,0.09221658,0.1173709,0.09028634,0.09167166,0.0,0.002613228,0.0627919,402.0,528.0,18.0
50%,0.0,0.07943809,0.0,0.1739852,0.0,0.1600084,0.003842163,0.04178315,0.1012165,0.1518987,...,0.1641337,0.1946903,0.1435986,0.1504599,0.002826855,0.02615279,0.1158754,687.0,932.0,29.0
75%,0.0,0.1705591,0.1280665,0.2532106,0.1094423,0.2734651,0.09161205,0.08275516,0.1710894,0.2569457,...,0.2615385,0.2848,0.2044728,0.220516,0.09327217,0.06365796,0.1902206,978.0,1337.0,40.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1400.0,1887.0,69.0
