In [1]:
import os
import re
from os import listdir
from os.path import isfile, join
from pathlib import Path

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage
from joblib import Parallel, delayed
from skimage import exposure, io
from tqdm.notebook import tqdm, trange

from readimc import MCDFile, TXTFile


In [2]:
p_dir = (Path().cwd().parents[0]).absolute()
data_dir = p_dir / "data"

# Create metadata

In [12]:
from collections import defaultdict

def get_info(img_folder):
    """Function returns the info from folder containing multi-cycle staigning on cell

    Args:
        img_folder (str) : imgage folder path to get information
        name_dict (dict) : three level dictionnary mapping cycle -> channel -> marker name

    Returns:
        pandas dataframe with information
    """
   
    info =  defaultdict(list)
    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(img_folder):
        for name in sorted(filenames):
            if 'mcd' not in name:
                continue

            info['Dataset'].append(dirpath.split("\\")[-1])
            info['Path'].append(os.path.join(dirpath, name))
            path = os.path.join(dirpath, name)
            
    df = pd.DataFrame(info)
    return df

In [13]:
donors = [
    "LN Donor A",
    "LN Donor E",
    "INT Donor B",
    "INT Donor E",
    "TS Donor A",
    "TS Donor E",
    "SP Donor A"
]

df = get_info(data_dir / 'mcd')

In [14]:
df

Unnamed: 0,Dataset,Path
0,INT Donor B,Y:\coskun-lab\Thomas\Leap\data\mcd\INT Donor B...
1,INT Donor E,Y:\coskun-lab\Thomas\Leap\data\mcd\INT Donor E...
2,LN Donor A,Y:\coskun-lab\Thomas\Leap\data\mcd\LN Donor A\...
3,LN Donor E,Y:\coskun-lab\Thomas\Leap\data\mcd\LN Donor E\...
4,SP Donor A,Y:\coskun-lab\Thomas\Leap\data\mcd\SP Donor A\...
5,TS Donor A,Y:\coskun-lab\Thomas\Leap\data\mcd\TS Donor A\...
6,TS Donor E,Y:\coskun-lab\Thomas\Leap\data\mcd\TS Donor E\...


# Read images

In [None]:
from sklearn.neighbors import NearestNeighbors
from skimage.util import img_as_ubyte

def get_NN(data, n):
    fit = NearestNeighbors(n_neighbors=n).fit(data)
    distances, indices = fit.kneighbors(data)

    return distances, indices


def filter_img_knn(img, n=25, th=3.5):
    # Get avg distances per positive expressed pixels
    x, y = np.where(img > 0)
    values = img[x, y]

    data = np.column_stack((x, y))
    distances, indices = get_NN(data, n)
    # avg_dist = np.average(distances, axis=1, weights=values[indices])
    avg_dist = np.average(distances, axis=1)

    filter_ind = avg_dist > th
    unique, counts = np.unique(filter_ind, return_counts=True)
    print(unique, counts)
    x_fil = x[filter_ind]
    y_fil = y[filter_ind]

    img_fil = img.copy()
    img_fil[x_fil, y_fil] = 0

    return img_fil

def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        chunk_shape = (1,) + data_shape[1:]
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=chunk_shape,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def contrast_streching(img):
    img = img[:1000, :1000]
    p2, p98 = np.percentile(img, (0.1, 99.9))
    img = exposure.rescale_intensity(img, in_range=(p2, p98), out_range=(0, 255)).astype(np.uint8)
    return img

def read_img(path):
    # return contrast_streching(skimage.io.imread(path))
    return skimage.io.imread(path)

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [31]:
for row in df.itertuples():
    path = row.Path
    

In [32]:
with MCDFile(path) as f:
    slide = f.slides[0]
    
    for acquisition in slide.acquisitions:
        print(acquisition.description)
        img = f.read_acquisition(acquisition)
        labels = acquisition.channel_labels
        break

ROI_1
