In [96]:
import os 
import sys

from os import listdir
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import trange, tqdm
from pathlib import Path
from skimage import exposure, io
import h5py
import cv2 as cv
from skimage.util import img_as_ubyte
from skimage import exposure
import pandas as pd
from joblib import Parallel, delayed
import skimage

In [97]:
p_dir = (Path().cwd().parents[0]).absolute()
data_dir = p_dir / 'data'
match_info_dir = data_dir / 'match'

In [98]:
%load_ext autoreload
%autoreload 2

module_path = str(p_dir / "src")

if module_path not in sys.path:
    sys.path.append(module_path)

import utils as my_utils

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [99]:
experiment = 'endometrium'

ts_data_dir = p_dir / 'data' / 'tof-sims' / experiment
datasets = listdir(ts_data_dir)
datasets = [dataset for dataset in datasets if 'auto' not in dataset]

# Import SIMS data

In [100]:
from skimage.transform import rotate
from functools import partial
import matplotlib.patches as mpatches
from skimage.segmentation import mark_boundaries
from sklearn.preprocessing import MinMaxScaler

h5_data_dir = p_dir / 'data' / 'h5'

def get_imgs_sims(experiment, name):
    with h5py.File(h5_data_dir / f'{experiment}.hdf5', 'r') as f:
        imgs = f[name][:]
        labels = list(f[name].attrs['labels'])
    return imgs, labels

def contrast_stretching(img):
    # Contrast stretching
    p2, p98 = np.percentile(img, (0.1, 99.9))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98), out_range = (0, 255)).astype(np.uint8)
    return img_rescale

# Read mask image
def get_masks(mask_folder, dataset):
    '''
    Function to get all mask from mask forlder
    '''
    # Read masks
    masks = {}

    for (dirpath, dirnames, filenames) in os.walk(mask_folder):
        for name in sorted(filenames):
            if "tif" in name and dataset in name:
                filename = os.path.join(dirpath, name)
                img = skimage.io.imread(filename)
                condition = name.split('.')[0].split('_')[1:]
                condition = '_'.join(condition)
                masks[condition] = img
            else:
                continue
    return masks

def crop_img(img, rotation, bbox):
    y, h_region, x, w_region = bbox
    img_rot = rotate(img, rotation)
    img_rot = img_rot[y:y+h_region, x:x+w_region]
    img_rot = img_as_ubyte(img_rot)
    return contrast_stretching(img_rot)

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [101]:
from skimage.measure import regionprops_table
from skimage.transform import resize

def get_region_props(img, mask, marker, prop):
    """
    Get regionprops from masked region
    """
    props = regionprops_table(mask, img, properties=prop)

    df_marker = pd.DataFrame(props)
    df_marker["Marker"] = marker
    df_marker["Id"] = regionprops_table(mask, properties=["label"])["label"]

    return df_marker

def extract_prop(imgs, labels, mask):
    appended_data_intensity = []
    
    for i, img in enumerate(imgs):
        df_marker = get_region_props(img, mask, labels[i], ["mean_intensity"])
        appended_data_intensity.append(df_marker)
        
        if i == 0:
            df_prop_morph = get_region_props(
                img, mask, labels[i], ["area", "centroid"]
            )
    
    df_prop_intensity = pd.concat(appended_data_intensity, ignore_index=True)
    df_prop_intensity = df_prop_intensity.sort_values(
        by=["Id"]
    ).reset_index(drop=True)
    
    x_scaled = MinMaxScaler().fit_transform(df_prop_intensity[['mean_intensity']])
    df_prop_intensity['mean_intensity'] = x_scaled
    
    df_prop_morph = df_prop_morph.drop(['Marker'], axis=1)
    
    return df_prop_intensity, df_prop_morph

def extract_prop_ts(imgs, labels, mask):
    appended_data_intensity = []
    
    for i, img in enumerate(imgs):
        img = resize(img, mask.shape, anti_aliasing=False)
        p2, p98 = np.percentile(img, (0, 100))
        img = exposure.rescale_intensity(img, in_range=(p2, p98), out_range=(0, 255))
        img = img.astype(np.uint8)
        df_marker = get_region_props(img, mask, labels[i], ["mean_intensity"])
        appended_data_intensity.append(df_marker)
        
        if i == 0:
            df_prop_morph = get_region_props(
                img, mask, labels[i], ["area", 'centroid']
            )
    
    df_prop_intensity = pd.concat(appended_data_intensity, ignore_index=True)
    df_prop_intensity = df_prop_intensity.sort_values(
        by=["Id"]
    ).reset_index(drop=True)
    
    df_prop_morph = df_prop_morph.drop(['Marker'], axis=1)
    
    x_scaled = MinMaxScaler().fit_transform(df_prop_intensity[['mean_intensity']])
    df_prop_intensity['mean_intensity'] = x_scaled
    
    return df_prop_intensity, df_prop_morph

In [102]:
masses = [50,190,191,193,194,196,198,200,201,202,203,55,204,205,206,207,208,209,210,212,214,216,56,217,220,221,222,223,224,225,226,229,232,57,236,238,240,243,246,248,249,250,252,253,58,254,256,259,261,263,265,267,269,273,275,59,276,279,282,289,301,307,308,310,318,60,328,332,339,344,353,359,369,380,385,397,61,412,424,431,436,448,457,468,474,479,498,63,501,510,514,518,520,529,544,552,570,586,64,593,610,1,66,67,68,69,70,71,72,73,74,75,16,76,78,79,80,81,82,83,84,85,86,17,87,88,91,93,95,96,97,98,100,102,30,104,105,107,109,111,112,115,116,117,118,34,121,122,124,125,127,128,131,133,134,138,35,140,141,143,144,145,146,148,149,150,151,42,153,155,156,158,159,161,162,164,165,166,44,167,168,169,170,171,173,179,182,184,185]
masses.sort()
masses = ['Total', 'Rest'] + masses

In [103]:
masks = get_masks(data_dir / 'masks', experiment)

In [104]:
for dataset in datasets:

    mask = masks[dataset]
    imgs, labels= get_imgs_sims(experiment, dataset)
    
    df_prop_intensity, df_prop_morph = extract_prop_ts(imgs, masses, mask)
    df_prop_intensity = pd.pivot_table(df_prop_intensity, values='mean_intensity', index='Id', columns='Marker')
    df_prop_intensity.to_csv(data_dir / 'props' / f'intensity_TS_{experiment}_{dataset}.csv')
    df_prop_morph.to_csv(data_dir / 'props' / f'morphology_IMC_{experiment}_{dataset}.csv', index=False)

# Patch based intensity extraction

In [105]:
from einops import rearrange, reduce, repeat

In [106]:
for dataset in datasets:

    mask = masks[dataset]
    imgs, labels= get_imgs_sims(experiment, dataset)
    imgs = np.stack(imgs).astype(np.float64)
    imgs_reduce = reduce(imgs, 'n (h h2) (w w2) -> n h w', 'mean', h2=4, w2=4)
    imgs_reduce = rearrange(imgs_reduce, 'n h w -> n (h w)')
    break