In [68]:
import os
import sys
from pathlib import Path

import cv2 as cv
import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from skimage import exposure, io
from skimage.util import img_as_ubyte
from tqdm.notebook import tqdm, trange

In [69]:
p_dir = (Path().cwd().parents[0]).absolute()
data_dir = p_dir / "data" / "IMC"

In [70]:
%load_ext autoreload
%autoreload 2

module_path = str(p_dir / "src")

if module_path not in sys.path:
    sys.path.append(module_path)

import utils as my_utils

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load IMC data

In [71]:
from functools import partial
from sklearn.neighbors import NearestNeighbors


def get_NN(data, n):
    fit = NearestNeighbors(n_neighbors=n).fit(data)
    distances, indices = fit.kneighbors(data)

    return distances, indices


def filter_img_knn(img, n=25, th=3.5):
    # Get avg distances per positive expressed pixels
    x, y = np.where(img > 0)
    values = img[x, y]

    data = np.column_stack((x, y))
    distances, indices = get_NN(data, n)
    # avg_dist = np.average(distances, axis=1, weights=values[indices])
    avg_dist = np.average(distances, axis=1)

    filter_ind = avg_dist > th
    unique, counts = np.unique(filter_ind, return_counts=True)
    print(unique, counts)
    x_fil = x[filter_ind]
    y_fil = y[filter_ind]

    img_fil = img.copy()
    img_fil[x_fil, y_fil] = 0

    return img_fil

def contrast_streching(img):
    p2, p98 = np.percentile(img, (0.1, 99.9))
    return exposure.rescale_intensity(img, in_range=(p2, p98))


def read_img(path: str) -> np.ndarray:
    """
    Read image from path
    """
    img = io.imread(path, as_gray=True)
    img = contrast_streching(img)
    img = img_as_ubyte(img)
    return img


def get_info(img_folder):
    """Function returns the info from folder containing multi-cycle staigning on cell

    Args:
        img_folder (str) : imgage folder path to get information
        name_dict (dict) : three level dictionnary mapping cycle -> channel -> marker name

    Returns:
        pandas dataframe with information
    """
    rois = []
    images_path = []
    markers = []

    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(img_folder):
        for name in sorted(filenames):
            if "tiff" not in name:
                continue

            roi = dirpath.split("_")[-1]
            try:
                marker = name.split("_")[2].split(".")[0]
                if marker == "contaminant":
                    continue
                elif marker == "DNA":
                    if "191Ir" in name:
                        marker += "1"
                    else:
                        marker += "2"
                elif "Cell_Mask" in name:
                    marker = "Mask"
            except:
                continue

            path = os.path.join(dirpath, name)
            rois.append(roi)
            markers.append(marker)
            images_path.append(path)

    info = {
        "ROI": rois,
        "Marker": markers,
        "Path": images_path,
    }
    df = pd.DataFrame(info)
    return df


def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)



In [72]:
experiment = 'endometrium L128'

df_all = get_info(data_dir / experiment)

In [73]:
df_all

Unnamed: 0,ROI,Marker,Path
0,1,SMA,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
1,1,Vimentin,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
2,1,PanKeratin,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
3,1,Ecaderin,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
4,1,CD8a,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
5,1,CD45RA,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
6,1,CD3,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
7,1,CD45RO,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
8,1,DNA1,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
9,1,DNA2,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...


In [74]:
group = df_all.groupby("ROI")


for name, df_group in group:
    paths = df_group.Path.tolist()
    markers = df_group.Marker.tolist()
    imgs = joblib_loop(read_img, paths)
    imgs_filtered = joblib_loop(filter_img_knn, imgs)

    path = p_dir / "data" / "h5" / f"{experiment.split(' ')[1]}_{name}.hdf5"
    imgs = np.stack(imgs_filtered, axis=0)
    my_utils.save_hdf5(path, "IMC", imgs, {"markers": markers})