In [1]:
import os
import re
from os import listdir
from os.path import isfile, join
from pathlib import Path

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage
from joblib import Parallel, delayed
from skimage import exposure, io
from tqdm.notebook import tqdm, trange

In [2]:
p_dir = (Path().cwd().parents[0]).absolute()
data_dir = p_dir / "data"

# Create metadata

In [3]:
def get_info(img_folder):
    """Function returns the info from folder containing multi-cycle staigning on cell

    Args:
        img_folder (str) : imgage folder path to get information
        name_dict (dict) : three level dictionnary mapping cycle -> channel -> marker name

    Returns:
        pandas dataframe with information
    """
    rois = []
    images_path = []
    markers = []

    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(img_folder):
        for name in sorted(filenames):
            if "tiff" not in name or 'Mask' in name:
                continue

            roi = dirpath.split("_")[-1]
            try:
                marker = name.split("_")[2].split(".")[0]
                if marker == "contaminant":
                    continue
                elif marker == "DNA":
                    if "191Ir" in name:
                        marker += "1"
                    else:
                        marker += "2"
            except:
                continue

            path = os.path.join(dirpath, name)
            rois.append(roi)
            markers.append(marker)
            images_path.append(path)

    info = {
        "ROI": rois,
        "Marker": markers,
        "Path": images_path,
    }
    df = pd.DataFrame(info)
    return df

In [4]:
donors = [
    "LN Donor A",
    "LN Donor E",
    "INT Donor B",
    "INT Donor E",
    "TS Donor A",
    "TS Donor E",
    "SP Donor A"
]

donor = donors[-2]
df = get_info(data_dir / 'ROI_images' /donor)
df.to_csv(data_dir / "metadata" / f"info_{donor}.csv", index=False)

In [5]:
df

Unnamed: 0,ROI,Marker,Path
0,1,CD38,Y:\coskun-lab\Thomas\Leap\data\ROI_images\TS D...
1,1,Vimentin,Y:\coskun-lab\Thomas\Leap\data\ROI_images\TS D...
2,1,CD21,Y:\coskun-lab\Thomas\Leap\data\ROI_images\TS D...
3,1,BCL6,Y:\coskun-lab\Thomas\Leap\data\ROI_images\TS D...
4,1,ICOS1,Y:\coskun-lab\Thomas\Leap\data\ROI_images\TS D...
...,...,...,...
463,18,CD27,Y:\coskun-lab\Thomas\Leap\data\ROI_images\TS D...
464,18,EZH2,Y:\coskun-lab\Thomas\Leap\data\ROI_images\TS D...
465,18,H3K27me3,Y:\coskun-lab\Thomas\Leap\data\ROI_images\TS D...
466,18,DNA1,Y:\coskun-lab\Thomas\Leap\data\ROI_images\TS D...


# Read images

In [6]:
from sklearn.neighbors import NearestNeighbors
from skimage.util import img_as_ubyte

def get_NN(data, n):
    fit = NearestNeighbors(n_neighbors=n).fit(data)
    distances, indices = fit.kneighbors(data)

    return distances, indices


def filter_img_knn(img, n=25, th=3.5):
    # Get avg distances per positive expressed pixels
    x, y = np.where(img > 0)
    values = img[x, y]

    data = np.column_stack((x, y))
    distances, indices = get_NN(data, n)
    # avg_dist = np.average(distances, axis=1, weights=values[indices])
    avg_dist = np.average(distances, axis=1)

    filter_ind = avg_dist > th
    unique, counts = np.unique(filter_ind, return_counts=True)
    print(unique, counts)
    x_fil = x[filter_ind]
    y_fil = y[filter_ind]

    img_fil = img.copy()
    img_fil[x_fil, y_fil] = 0

    return img_fil



def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        chunk_shape = (1,) + data_shape[1:]
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=chunk_shape,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def contrast_streching(img):
    img = img[:1000, :1000]
    p2, p98 = np.percentile(img, (0.1, 99.9))
    img = exposure.rescale_intensity(img, in_range=(p2, p98), out_range=(0, 255)).astype(np.uint8)
    return img


def read_img(path):
    # return contrast_streching(skimage.io.imread(path))
    return contrast_streching(skimage.io.imread(path))

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [7]:
# Loop through datasets
for donor in donors:
    # Get info DF used for clustering
    df = get_info(data_dir /'ROI_images' / donor)
    
    # Define saving location
    h5_data = p_dir / "data" / "h5_new" / f"{donor}.hdf5"
    
    # Loops through ROIs
    group = df.groupby("ROI")
    for name, df_group in group:
        paths = df_group.Path.tolist()
        imgs_raw = joblib_loop(read_img, paths)
        imgs = joblib_loop(filter_img_knn, imgs_raw)
        markers = df_group.Marker.tolist()
        imgs = np.stack(imgs, axis=0)
        save_hdf5(h5_data, name, imgs, {"labels": markers})

In [8]:
# import napari

# viewer = napari.view_image(imgs, channel_axis=0, name=markers, visible=False)

In [9]:
# import napari

# viewer = napari.view_image(np.stack(imgs_raw, axis=0), channel_axis=0, name=markers, visible=False)

# Stitch Images from position

In [16]:
import matplotlib.patches as mpatches
from skimage.segmentation import mark_boundaries

def get_imgs(file_path, name):
    f = h5py.File(file_path, "r")
    imgs = f[name]
    labels = list(f[name].attrs["labels"])
    return imgs, labels


def contrast_streching(img):
    p2, p98 = np.percentile(img, (2, 99))
    return exposure.rescale_intensity(img, in_range=(p2, p98))

def get_img_size(roi_dict, size=1000):
    row_max = 0
    col_max = 0
    for k, v in roi_dict.items():
        row_max = max(row_max, v[0])
        col_max = max(col_max, v[1])
    return row_max + size, col_max + size

def get_img_subset(imgs, markers, labels):
    imgs_subset = []
    for marker in markers:
        idx = labels.index(marker)
        imgs_subset.append(imgs[idx])
    return np.stack(imgs_subset, axis=2)



In [17]:
# Format row, col
arrangement = {
    "LN Donor A": {
        1: [0, 1000],
        2: [0, 2000],
        3: [1000, 0],
        4: [1000, 1000],
        5: [1000, 2000],
        6: [1000, 3000],
        7: [2000, 0],
        8: [2000, 1000],
        9: [2000, 2000],
        10: [2000, 3000],
        11: [3000, 0],
        12: [3000, 1000],
        13: [3000, 2000],
        14: [3000, 3000],
        15: [4000, 1000],
        16: [4000, 2000],
    },
    "LN Donor E": {
        1: [1000, 0],
        2: [1000, 1000],
        3: [1000, 2000],
        4: [1000, 3000],
        5: [1000, 4000],
        6: [1000, 5000],
        7: [1000, 6000],
        8: [1000, 7000],
        9: [1000, 8000],
        10: [0, 0],
        11: [0, 1000],
        12: [0, 2000],
        13: [0, 3000],
        14: [0, 4000],
    },
    "INT Donor B": {
        1: [0, 0],
        2: [0, 1000],
        3: [1000, 0],
        4: [1000, 1000],
        5: [2000, 0],
        6: [2000, 1000],
        7: [2000, 2000],
        8: [2000, 3000],
        9: [3000, 0],
        10: [3000, 1000],
        11: [3000, 2000],
        12: [3000, 3000],
        13: [4000, 0],
        14: [4000, 1000],
        15: [4000, 2000],
        16: [4000, 3000],
        17: [5000, 0],
        18: [5000, 1000],
        19: [5000, 2000],
        20: [5000, 3000],
    },
    "INT Donor E": {
        1: [0, 0],
        2: [0, 1000],
        3: [0, 2000],
        4: [0, 3000],
        # 5: [0, 4000],
        6: [1000, 0],
        7: [1000, 1000],
        8: [1000, 2000],
        9: [1000, 3000],
        10: [1000, 4000],
        11: [2000, 3000],
        12: [2000, 4000],
        13: [3000, 3000],
        14: [3000, 4000],
        15: [4000, 3000],
        16: [4000, 4000],
    },
    "TS Donor A": {
        1: [0, 0],
        2: [0, 1000],
        3: [0, 2000],
        4: [0, 3000],
        5: [0, 4000],
        6: [0, 5000],
        7: [0, 6000],
        8: [1000, 0],
        9: [1000, 1000],
        10: [1000, 2000],
        11: [1000, 3000],
        12: [1000, 4000],
        13: [1000, 5000],
        14: [1000, 6000],
    },
    "TS Donor E": {
        1: [0, 0],
        2: [0, 1000],
        3: [0, 2000],
        4: [1000, 0],
        5: [1000, 1000],
        6: [1000, 2000],
        7: [2000, 0],
        8: [2000, 1000],
        9: [2000, 2000],
        10: [3000, 0],
        11: [3000, 1000],
        12: [3000, 2000],
        13: [4000, 0],
        14: [4000, 1000],
        15: [4000, 2000],
        16: [5000, 0],
        17: [5000, 1000],
        18: [5000, 2000],
    },
}

In [34]:
markers = ['CD38',
 'Vimentin',
 'CD21',
 'BCL6',
 'ICOS1',
 'CD11b',
 'CD86',
 'CXCR4',
 'CD11c',
 'FoxP3',
 'CD4',
 'CD138',
 'CXCR5',
 'CD20',
 'CD8',
 'C-Myc',
 'PD1',
 'CD83',
 'Ki67',
 'COL1',
 'CD3',
 'CD27',
 'EZH2',
 'H3K27me3',
 'DNA1',
 'DNA2']

size = 1000



In [44]:
for donor in donors[5:-1]:
    h5_data = p_dir / "data" / "h5_new" / f"{donor}.hdf5"

    # Create combined images
    y_max, x_max = get_img_size(arrangement[donor])
    img_combined = np.zeros((y_max, x_max, len(markers)), dtype=np.uint8)

    ROIs = [i for i in range(1, 21)]
    for roi in tqdm(ROIs):
        if roi not in arrangement[donor].keys():
            continue

        # Read imgs
        imgs, labels = get_imgs(h5_data, str(roi))

        # Get multiplex image
        data = get_img_subset(imgs, markers, labels)

        # Insert Combined images
        y = arrangement[donor][roi][0]
        x = arrangement[donor][roi][1]
        img_combined[y : y + size, x : x + size, :] = data

    # save_path = (
    #     p_dir / "figures" / "multiplex" / f"combined_{donor}_{markers_subset}.png"
    # )
    # img_combined = Image.fromarray(img_combined)
    # img_combined.save(save_path)
    break

  0%|          | 0/20 [00:00<?, ?it/s]

In [46]:
import napari 

napari.view_image(img_combined, channel_axis=2, name=markers, visible=False)

Viewer(axes=Axes(visible=False, labels=True, colored=True, dashed=False, arrows=True), camera=Camera(center=(0.0, 2999.5, 1499.5), zoom=0.147725, angles=(0.0, 0.0, 90.0), perspective=0, interactive=True), cursor=Cursor(position=(1, 1), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 6000.0, 1.0), (0.0, 3000.0, 1.0)), current_step=(3000, 1500), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'CD38' at 0x1d85ff17730>, <Image layer 'Vimentin' at 0x1d85fd61a20>, <Image layer 'CD21' at 0x1d8608d05e0>, <Image layer 'BCL6' at 0x1d8609501f0>, <Image layer 'ICOS1' at 0x1d8aebf7dc0>, <Image layer 'CD11b' at 0x1d8aec7f9d0>, <Image layer 'CD86' at 0x1d8b0013610>, <Image layer 'CXCR4' at 0x1d8b3deb220>, <Image layer 'CD11c' at 0x1d8b3e76e30>, <Image layer 'FoxP3' at 0x1d8b45d6a40>, <Image layer 'CD4' at 0x1d8b465e650>, <Image layer 'CD138' at 0x1d8b46ee260>, 