In [1]:
import os
import re
from os import listdir
from os.path import isfile, join
from pathlib import Path

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage
from joblib import Parallel, delayed
from skimage import exposure, io
from tqdm.notebook import tqdm, trange

In [2]:
p_dir = (Path().cwd().parents[0]).absolute()
data_dir = p_dir / "data" / "IMC"

# Create metadata

In [3]:
def get_info(img_folder):
    """Function returns the info from folder containing multi-cycle staigning on cell

    Args:
        img_folder (str) : imgage folder path to get information
        name_dict (dict) : three level dictionnary mapping cycle -> channel -> marker name

    Returns:
        pandas dataframe with information
    """
    rois = []
    images_path = []
    markers = []

    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(img_folder):
        for name in sorted(filenames):
            if "tiff" not in name:
                continue

            roi = dirpath.split("_")[-1]
            try:
                marker = name.split("_")[2].split(".")[0]
                if marker == "contaminant":
                    continue
                elif marker == "DNA":
                    if "191Ir" in name:
                        marker += "1"
                    else:
                        marker += "2"
                elif "Cell_Mask" in name:
                    marker = "Mask"
            except:
                continue

            path = os.path.join(dirpath, name)
            rois.append(roi)
            markers.append(marker)
            images_path.append(path)

    info = {
        "ROI": rois,
        "Marker": markers,
        "Path": images_path,
    }
    df = pd.DataFrame(info)
    return df

In [4]:
donor = "endometrium L011"
# donor = 'Tonsil DonorE'

df_all = get_info(data_dir / donor)

In [5]:
df_mask = df_all[df_all.Marker == "Mask"]
df_mask

Unnamed: 0,ROI,Marker,Path


In [6]:
df = df_all[df_all.Marker != "Mask"]
df

Unnamed: 0,ROI,Marker,Path
0,1,SMA,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
1,1,Vimentin,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
2,1,PanKeratin,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
3,1,Ecaderin,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
4,1,CD8a,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
...,...,...,...
115,12,CD45RA,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
116,12,CD3,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
117,12,CD45RO,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...
118,12,DNA1,Y:\coskun-lab\Thomas\ScSpaMet\data\IMC\endomet...


# Read images

In [19]:
from sklearn.neighbors import NearestNeighbors


def get_NN(data, n):
    fit = NearestNeighbors(n_neighbors=n).fit(data)
    distances, indices = fit.kneighbors(data)

    return distances, indices


def filter_img_knn(img, n=25, th=3.5):
    # Get avg distances per positive expressed pixels
    x, y = np.where(img > 0)
    values = img[x, y]

    data = np.column_stack((x, y))
    distances, indices = get_NN(data, n)
    # avg_dist = np.average(distances, axis=1, weights=values[indices])
    avg_dist = np.average(distances, axis=1)

    filter_ind = avg_dist > th
    unique, counts = np.unique(filter_ind, return_counts=True)
    print(unique, counts)
    x_fil = x[filter_ind]
    y_fil = y[filter_ind]

    img_fil = img.copy()
    img_fil[x_fil, y_fil] = 0

    return img_fil


def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        chunk_shape = (1,) + data_shape[1:]
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=chunk_shape,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()


def contrast_streching(img, n_min=0.5, n_max=99):
    p2, p98 = np.percentile(img, (n_min, n_max))
    return exposure.rescale_intensity(
        img, in_range=(p2, p98), out_range=(0, 255)
    ).astype(np.uint8)


def read_img(path):
    return skimage.io.imread(path)


def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)


def get_img_size(roi_dict, size=1000):
    row_max = 0
    col_max = 0
    for k, v in roi_dict.items():
        row_max = max(row_max, v[0])
        col_max = max(col_max, v[1])
    return row_max + size, col_max + size

In [20]:
# Tonsil Donor A
# arrangement = {
#     1: [0, 0],
#     2: [0, 1000],
#     3: [1000, 0],
#     4: [1000, 1000],
#     5: [0, 2500],
#     6: [0, 3500],
#     7: [1000, 2500],
#     8: [1000, 3500],
# }

# arrangement = {
#     1: [0, 0],
#     5: [800, 0],
#     9: [1600, 0],
# }

# Tonsil Donor E
arrangement = {
    1: [0, 0],
    2: [0, 1000],
    3: [0, 2000],
    4: [1000, 0],
    5: [1000, 1000],
    6: [1000, 2000],
    7: [2000, 0],
    8: [2000, 1000],
    9: [2000, 2000],
    10: [3000, 0],
    11: [3000, 1000],
    12: [3000, 2000],
    13: [4000, 0],
    14: [4000, 1000],
    15: [4000, 2000],
    16: [5000, 0],
    17: [5000, 1000],
    18: [5000, 2000],
}

# L011
arrangement = {
    1: [0, 0],
    2: [0, 800],
    3: [0, 1600],
    4: [0, 2400],
    5: [800, 0],
    6: [800, 800],
    7: [800, 1600],
    8: [800, 2400],
    9: [1600, 0],
    10: [1600, 800],
    11: [1600, 1600],
    12: [1600, 2400],
}


In [21]:
# Combine IMC image
size = 800
group = df.groupby("ROI")
h5_data = p_dir / "data" / "h5" / f"{donor}.hdf5"
row, col = get_img_size(arrangement, size=size)
imgs_combined = np.zeros((len(df.Marker.unique()), row, col), dtype=np.uint8)


# for name, df_group in group:
#     if name in ['1', '5', '9']:
#         paths = df_group.Path.tolist()
#         imgs = joblib_loop(read_img, paths)
#         imgs = joblib_loop(filter_img_knn, imgs)
#         markers = df_group.Marker.tolist()
#         imgs = np.stack(imgs, axis=0)

#         roi = int(name)
#         y = arrangement[roi][0]
#         x = arrangement[roi][1]
#         imgs_combined[:, y : y + size, x : x + size] = imgs


for name, df_group in group:
    paths = df_group.Path.tolist()
    imgs = joblib_loop(read_img, paths)
    imgs = joblib_loop(filter_img_knn, imgs)
    markers = df_group.Marker.tolist()
    imgs = np.stack(imgs, axis=0)

    roi = int(name)
    y = arrangement[roi][0]
    x = arrangement[roi][1]
    imgs_combined[:, y : y + size, x : x + size] = imgs
    
for i in range(imgs_combined.shape[0]):
    if markers[i] in ['SMA', 'Vimentin']:
        imgs_combined[i,...] = contrast_streching(imgs_combined[i,...])
    else:
        imgs_combined[i,...] = contrast_streching(imgs_combined[i,...])

In [23]:
import napari

napari.view_image(imgs_combined, channel_axis=0, name=markers)

Viewer(axes=Axes(visible=False, labels=True, colored=True, dashed=False, arrows=True), camera=Camera(center=(0.0, 1199.5, 1599.5), zoom=0.23710416666666667, angles=(0.0, 0.0, 90.0), perspective=0.0, interactive=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 2400.0, 1.0), (0.0, 3200.0, 1.0)), current_step=(1200, 1600), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'SMA' at 0x1fca385c610>, <Image layer 'Vimentin' at 0x1fca3346fa0>, <Image layer 'PanKeratin' at 0x1fca39d0a60>, <Image layer 'Ecaderin' at 0x1fcaabe2fa0>, <Image layer 'CD8a' at 0x1fcab1a03d0>, <Image layer 'CD45RA' at 0x1fcab42fa60>, <Image layer 'CD3' at 0x1fcb0136fa0>, <Image layer 'CD45RO' at 0x1fcb01f63d0>, <Image layer 'DNA1' at 0x1fcb0268a60>, <Image layer 'DNA2' at 0x1fcb02fdfa0>], scale_bar=ScaleBar(visible=False, colored=False, tick

In [24]:
save_hdf5(h5_data, "IMC", imgs_combined, {"markers": markers})

In [33]:
# # Combine mask image
# group = df_mask.groupby("ROI")
# mask_combined = np.zeros((row, col), dtype=np.uint32)

# sum_max = 0
# for name, df_group in group:
#     path = df_group.Path.item()
#     img = skimage.io.imread(path)
#     img = np.where(img > 0, img + sum_max, 0)

#     # Shift label per ROI
#     sum_max = np.max(img)

#     roi = int(name)
#     y = arrangement[roi][0]
#     x = arrangement[roi][1]
#     mask_combined[y : y + size, x : x + size] = img

In [34]:
# save_hdf5(h5_data, "mask", mask_combined)