In [1]:
import itertools
import os
import sys
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io

from collections import defaultdict
from tqdm.notebook import trange, tqdm, tqdm_notebook
from joblib import Parallel, delayed
import re
import h5py
import tifffile as tiff
from natsort import natsort_keygen

from skimage import util

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
data_dir = (Path().cwd().parents[0] / 'data').absolute()
data_processed = data_dir / 'processed'
data_raw = r'Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\PLA\FFPE patient samples-1Nov23'


In [4]:
import os
 
os.environ['JDK_HOME'] = "C:\Program Files\Java\jdk-17"
os.environ['JAVA_HOME'] = "C:\Program Files\Java\jdk-17"

os.environ['PATH'] += ';C:\\Program Files\\Java\\jdk-17\\jre\\bin\\server\\;C:\\Program Files\\Java\\jdk-17\\bin\\;C:\\Program Files\\Java\\jdk-17\\bin\\jar.exe'

# Get info

In [5]:
markers_map = {
    'cycle1': {
        1: 'Hoechst', 
        3: 'Sox2/Oct4',
        4: 'NF-Kb/p-P90rsk',
    },
    'cycle2': {
        1: 'Hoechst', 
        3: 'Cyclin D1/CDK4',
        4: 'TRAIL/DR5'
    },
    'cycle3': {
        1: 'Hoechst', 
        2: 'p-ERK/c-MYC',
    },
    'cycle4': {
        1: 'Hoechst', 
        3: 'Cyclin E/CDK2',
    },
    'cycle5': {
        1: 'Hoechst', 
        2: 'CD20',
        3: 'Phalloidin',
        4: 'CD8'
    },
    'cycle6': {
        1: 'Hoechst', 
        2: 'Pan-cytokeratin',
        4: 'CD4'
    },
    'cycle7': {
        1: 'Hoechst', 
        2: 'Concanavalin A',
        4: 'WGA'
    },
    'cycleHE': {
        2: 'HE',
    },
}

def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 
    
    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        # Don't get the nuclease after bleach channel
        if 'after nuclease' in dirpath or 'Test' in dirpath or 'wrong' in dirpath:
            continue
        
        for name in sorted(filenames):
            if "tif" in name and "sti" in name \
            and 'overlay' not in name \
            and 'Composite' not in name:
                # Get information from image name                
                d_split = dirpath.split('\\')
                condition = d_split[-1].split('_')[0]
                fov = d_split[-1].split('_')[2]
                if 'H&E' in dirpath:
                    cycle = 'cycleHE'
                else:
                    cycle =  d_split[-1].split('_')[1][-1]
                    cycle = 'cycle' + cycle

                n_split = name.split('_')
                ch = int(n_split[-1][-5])
                try: marker = marker_dict[cycle][ch] 
                except: continue
        
                conditions.append(condition)
                fovs.append(fov)
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            "FOV": fovs,
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

In [6]:
df_meta_path = data_dir / 'tissue' / 'metadata' / 'info.csv'
df_meta_path.parent.mkdir(parents=True, exist_ok=True)

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map)
    df = df.sort_values(
        by=["Condition", 'FOV', "Cycle", "Channels"],
        key=natsort_keygen()
    )
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Loaded df


In [7]:
df

Unnamed: 0,Condition,FOV,Cycle,Channels,Markers,Path
0,577-1197,20X,cycle1,1,Hoechst,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
1,577-1197,20X,cycle1,3,Sox2/Oct4,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
2,577-1197,20X,cycle1,4,NF-Kb/p-P90rsk,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
3,577-1197,20X,cycle2,1,Hoechst,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
4,577-1197,20X,cycle2,3,Cyclin D1/CDK4,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
...,...,...,...,...,...,...
75,577-1210,40X,cycle6,4,CD4,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
76,577-1210,40X,cycle7,1,Hoechst,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
77,577-1210,40X,cycle7,2,Concanavalin A,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
78,577-1210,40X,cycle7,4,WGA,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."


In [8]:
df.groupby(['Condition', 'FOV']).size()

Condition  FOV
577-1197   20X    20
           40X    20
577-1210   20X    20
           40X    20
dtype: int64

# Save HDF5

In [9]:
import h5py

def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=True,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [12]:
df_imgs_path = data_dir / 'tissue' / 'metadata' / 'imgs.csv'
df_imgs_path.parent.mkdir(parents=True, exist_ok=True)

temp_path = data_dir  / 'tissue' / 'hdf5' / 'raw'
temp_path.mkdir(parents=True, exist_ok=True)

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    
    group = df.groupby(['Condition', 'FOV'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        group_cycle = df_group.groupby('Cycle')
        for cycle, df_cycle in group_cycle:
            channels = df_cycle.Channels.to_list()
            markers = df_cycle.Markers.to_list()
            paths = df_cycle.Path.to_numpy()

            if cycle == 'cycleHE':
                imgs = np.array([0])
                info = {"Channels": 2, "Markers": 'HE', 'Path': paths[0]}
                # hdf5 as Channel -> Z mapping
                save_hdf5(file_path, cycle, imgs, info)
            else:
                # imgs = joblib_loop(read_img, paths)
                # imgs = np.array(imgs)
                # info = {"Channels": channels, "Markers": markers}
                continue

            # # hdf5 as Channel -> Z mapping
            # save_hdf5(file_path, cycle, imgs, info)
    df_imgs = pd.DataFrame(rows, columns=['Condition', 'FOV', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Created df


  0%|          | 0/4 [00:00<?, ?it/s]

Dataset cycleHE exists
Dataset cycleHE exists


In [13]:
df_imgs

Unnamed: 0,Condition,FOV,Path
0,577-1197,20X,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
1,577-1197,40X,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
2,577-1210,20X,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
3,577-1210,40X,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...


In [14]:
# path = r'Y:\\coskun-lab\\Shuangyi\\ERK, YAP project_2022\\PLA\\FFPE patient samples-1Nov23\\577-1197_2 H&E_20X_16Nov23\\stitched_2.tif'
# img_test = tf.imread(path)
# img_test.shape


In [15]:
# tf.imwrite('test.tif', img_test[::4, ::4,: ], bigtiff = True)

# Registration Image J

## Registration

In [16]:
from skimage import exposure, util

def contrast_str(img, n_min=0.1, n_max=100):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    return img_rescale

In [17]:
import tifffile as tf
from PIL import Image
import PIL.Image
PIL.Image.MAX_IMAGE_PIXELS = 933120000
import shutil
from datetime import date, datetime
import skimage.io 
from skimage import util
from skimage.transform import resize

In [19]:
group = df_imgs.groupby(['Condition', 'FOV'])
dim = {}
for name, df_group in group:
    path = df_group.iloc[0].Path
             
    # Read images
    cycles = []
    imgs_all = []
    channels = []

    with h5py.File(path, "r") as f:
        for k in f.keys():
            # Read immage in info
            cycle = k.split('_')[0][5:]
            channel = f[k].attrs['Channels']
            print(k, f[k].shape)
            if cycle == '2':
                dim[name] = [f[k].shape[1], f[k].shape[2]]

cycle1 (3, 23586, 43506)
cycle2 (3, 23610, 43510)
cycle3 (2, 23583, 43511)
cycle4 (2, 23599, 43515)
cycle5 (3, 23574, 43490)
cycle6 (3, 23576, 43496)
cycle7 (3, 23593, 43496)
cycleHE (3, 23593, 43496)
cycle1 (3, 4497, 11372)
cycle2 (3, 4445, 11247)
cycle3 (2, 4446, 11251)
cycle4 (2, 4446, 11239)
cycle5 (3, 4440, 11254)
cycle6 (3, 4444, 11247)
cycle7 (3, 4442, 11243)
cycleHE (1,)
cycle1 (3, 27600, 32793)
cycle2 (3, 29682, 31523)
cycle3 (2, 29666, 31521)
cycle4 (2, 29652, 31448)
cycle5 (3, 29763, 31474)
cycle6 (3, 29706, 32862)
cycle7 (3, 29649, 31519)
cycleHE (3, 29649, 31519)
cycle1 (3, 6521, 8689)
cycle2 (3, 6446, 8598)
cycle3 (2, 6447, 8610)
cycle4 (2, 6451, 8599)
cycle5 (3, 6439, 8686)
cycle6 (3, 6445, 8614)
cycle7 (3, 6464, 8635)
cycleHE (1,)


In [20]:
regSavePath = data_dir / 'tissue' /'imgs' / 'registered_imagej'
regSavePath.mkdir(parents=True, exist_ok=True)

HE_path = data_dir / 'tissue' /'HE' 

chs = [1, 2, 3, 4]
group = df_imgs.groupby(['Condition', 'FOV'])
for name, df_group in group:
    path = df_group.iloc[0].Path
             
    # Read images
    cycles = []
    imgs_all = []
    channels = []

    with h5py.File(path, "r") as f:
        for k in tqdm(f.keys(), total=len(f.keys()), leave=False):
            if k == 'cycleHE':
                name_join = '_'.join(name)
                paths = [HE_path / f'{name_join}_CH{i}.tif' for i in [1,2,3]]
                channel = np.array([1,2,3])
                imgs = [util.invert(tiff.imread(path)) for path in paths]
                cycle = '8'
                # continue
            else:
                # Read immage in info
                cycle = k.split('_')[0][5:]
                channel = f[k].attrs['Channels']
                imgs = f[k][:]
                # continue 

            if cycle == '1' and name[1] == '40X':
                dim_x = dim[name][0]
                dim_y = dim[name][1]
                imgs = resize(imgs, (len(imgs), dim_x, dim_y))
                imgs = util.img_as_ubyte(imgs)

            for ch in chs:
                # Save path per Channel
                folderPath = os.path.join(regSavePath, '_'.join(name), 'Original', 'CH' + str(ch)) # 1 index
                if not os.path.exists(folderPath):
                    os.makedirs(folderPath, exist_ok = True)
                
                fileOut = 'CH' + str(ch) + '_Cycle' + str(cycle).zfill(2) + '.tif'
                fileOut = os.path.join(folderPath, fileOut)
                # if os.path.exists(fileOut):
                #     continue
                if ch in channel:
                    tf.imwrite(fileOut, util.img_as_uint(imgs[list(channel).index(ch)]), photometric = 'minisblack', bigtiff = True)
                else:
                    emptyImage = np.zeros(imgs[0].shape, np.uint8)
                    # print('Dont exist create empty image', cycle, ch)
                    tf.imwrite(fileOut, emptyImage, photometric = 'minisblack', bigtiff = True)

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

In [21]:
group = df_imgs.groupby(['Condition', 'FOV'])
chs = [1, 2, 3, 4]

for name, channels in group:
    name = '_'.join(name)
    '''
    run("Register Virtual Stack Slices", "source=[Y:/coskun-lab/Nicky/07 Temp/register large stitch] output=[Y:/coskun-lab/Nicky/07 Temp/register output] feature=Rigid registration=[Rigid                -- translate + rotate                  ] advanced shrinkage save save_dir=[Y:/coskun-lab/Nicky/07 Temp/register output] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Rigid registration_model=[Rigid                -- translate + rotate                  ] interpolate");
    run("Transform Virtual Stack Slices", "source = [Y:/coskun-lab/Nicky/07 Temp/other channels/original] output = [Y:/coskun-lab/Nicky/07 Temp/other channels/original] transforms = [Y:/coskun-lab/Nicky/07 Temp/register output] interpolate");
    '''
    # folder to save registered images separated by channel to apply transforms
    # create all folder
    for ii, ch in enumerate(chs): # all channels
        os.makedirs(os.path.join(regSavePath, name, 'Original', 'CH' + str(ch)), exist_ok = True)
        os.makedirs(os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch)), exist_ok = True)
    
    os.chdir(os.path.join(regSavePath, name, 'Original', 'CH1'))
    now = datetime.now() # current date and time
    date_time = now.strftime("%d%b%Y")
    macro = open(date_time + '_register_transforms.ijm', 'w')
    
    # register cycles on CH1
    macro.write('run("Register Virtual Stack Slices", "source=[')
    # original files
    macro.write(os.path.join(regSavePath, name, 'Original', 'CH1').replace('\\', '/'))
    macro.write('] output=[')
    # registered output files
    macro.write(os.path.join(regSavePath, name, 'Registered', 'CH1').replace('\\', '/'))
    
    # Rigid registration: translation + rotation
    macro.write('] feature=Rigid registration=[Rigid                -- translate + rotate                  ] advanced shrinkage save save_dir=[')
    # folder to save recorded transformations 
    macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Rigid registration_model=[Rigid                -- translate + rotate                  ] interpolate"); \n')
    
    # # bigwrap registration
    # macro.write('] feature=Similarity registration=[Elastic              -- bUnwarpJ splines                    ] advanced shrinkage save save_dir=[')
    # # folder to save recorded transformations 
    # macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Similarity registration_model=[[Elastic              -- bUnwarpJ splines                    ] interpolate registration=Mono image_subsample_factor=0 initial_deformation=[Very Coarse] final_deformation=Fine divergence_weight=0.1 curl_weight=0.1 landmark_weight=1 image_weight=0 consistency_weight=0 stop_threshold=0.01 shear=0.95 scale=0.95 isotropy=1"); \n')
    
    # # Or use similarity: translation + rotation + isotropic scale
    # macro.write('] feature=Similarity registration=[Similarity           -- translate + rotate + isotropic scale] advanced shrinkage save save_dir=[')
    # # folder to save recorded transformations 
    # macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=25 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=50 inlier_ratio=0.05 feature_extraction_model=Similarity registration_model=[Similarity           -- translate + rotate + isotropic scale] interpolate"); \n')
    
    macro.write('run("Close All"); \n\n')
    
    # now apply transform to other channels
    for ii, ch in enumerate([1,2,3,4]): # each other channel
        
        macro.write('run("Transform Virtual Stack Slices", "source=[')
        # unregsitered folder
        macro.write(os.path.join(regSavePath, name, 'Original', 'CH' + str(ch)).replace('\\', '/'))
        macro.write('] output=[')
        # registered folder
        macro.write(os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch)).replace('\\', '/'))
        macro.write('] transforms=[')
        macro.write(os.path.join(regSavePath, name, 'Original', 'CH1').replace('\\', '/')) # stored in original registration folder
        macro.write('] interpolate"); \n')
        macro.write('run("Close All"); \n\n')
    
    macro.close()
    
    # print command to run macro
    print('runMacro("' + os.path.join(regSavePath, name, 'Original', 'CH1', macro.name).replace('\\', '/') + '");')

runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/tissue/imgs/registered_imagej/577-1197_20X/Original/CH1/21Nov2023_register_transforms.ijm");
runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/tissue/imgs/registered_imagej/577-1197_40X/Original/CH1/21Nov2023_register_transforms.ijm");
runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/tissue/imgs/registered_imagej/577-1210_20X/Original/CH1/21Nov2023_register_transforms.ijm");
runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/tissue/imgs/registered_imagej/577-1210_40X/Original/CH1/21Nov2023_register_transforms.ijm");


## Combine all registered images into single folder

In [49]:
regSavePath = data_dir / 'tissue' /'imgs' / 'registered_imagej'

regSaveFinalPath = data_dir / 'tissue' / 'imgs' / 'registered_imagej_final'
regSaveFinalPath.mkdir(parents=True, exist_ok=True)

regSaveCropPath = data_dir / 'tissue' / 'imgs' /  'registered_crop'
regSaveCropPath.mkdir(parents=True, exist_ok=True)

In [25]:
regSavePath

WindowsPath('y:/coskun-lab/Thomas/23_PLA_revision/data/tissue/imgs/registered_imagej')

In [26]:
group = df.groupby(['Condition', 'FOV'])

for name, channels in group:
    name = '_'.join(name)
    for ii, cycle in enumerate(tqdm(channels['Cycle'].unique())): # each cycle
    
        dfCycle = channels.loc[channels['Cycle'] == cycle]
        dfCycle.reset_index(drop = True, inplace = True) # index is channel - 1
        if cycle == 'cycleHE':
            cycle = '8'
            chs = [1,2,3]
        else:
            cycle = cycle[5:]
            chs = dfCycle.Channels
            
        for jj, ch in enumerate(chs): # each channel
            
            # find registered file
            tifPath = os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch), 'CH' + str(ch)+ '_Cycle' + str(cycle).zfill(2) + '.tif')

            # File out
            fileOut = 'Cycle' + str(cycle).zfill(2) + \
            '_' + 'CH' + str(ch) + '.tif'
            folder = regSaveFinalPath / name
            folder.mkdir(parents=True, exist_ok=True)
            fileOut = os.path.join(regSaveFinalPath, name, fileOut)
            # print(tifPath)
            # Copy
            if os.path.exists(tifPath):
                shutil.copyfile(tifPath, fileOut)
            else:
                continue

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

In [71]:
#### Cropped image to smallest bounding box of non black region

# Get channel list
group = df.groupby(['Condition', 'FOV'])

for name, df_group in group:
    channels = df_group.Channels.tolist()
    break

# Crop
for dir in os.listdir(regSaveFinalPath):

    # Read imgs
    imgs = []
    paths = []
    for file in os.listdir(regSaveFinalPath / dir):
        if 'tif' in file:
            path = regSaveFinalPath / dir/ file
            imgs.append(tiff.imread(path))
            paths.append(file)

    # Get bboxs
    bboxs = []
    for i, img in enumerate(imgs):
        if channels[i] != 1:
            continue
        bbox = skimage.measure.regionprops((img>0).astype(np.uint8))[0]['bbox']
        bboxs.append(np.array(bbox))
    bboxs = np.stack(bboxs)

    bbox_final = [np.max(bboxs[:,0]),
                np.max(bboxs[:,1]),
                np.min(bboxs[:,2]),
                np.min(bboxs[:,3])]

    min_row, min_col, max_row, max_col = bbox_final

    # Save cropped images
    save_dir = regSaveCropPath / dir
    save_dir.mkdir(parents=True, exist_ok=True)
    for i, img in enumerate(imgs):
        save_path = save_dir / paths[i]
        tiff.imwrite(save_path, img[min_row:max_row, min_col:max_col], bigtiff = True)

In [75]:
napari.view_image(np.stack(imgs)[:,min_row:max_row, min_col:max_col], channel_axis=0, contrast_limits=[0, 2**16])

Viewer(axes=Axes(visible=False, labels=True, colored=True, dashed=False, arrows=True), camera=Camera(center=(0.0, 2779.5, 4260.5), zoom=0.09037289485847182, angles=(0.0, 0.0, 90.0), perspective=0.0, interactive=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 5560.0, 1.0), (0.0, 8522.0, 1.0)), current_step=(2780, 4261), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'Image' at 0x23583f64310>, <Image layer 'Image [1]' at 0x23583df3460>, <Image layer 'Image [2]' at 0x235849dcfd0>, <Image layer 'Image [3]' at 0x23584bdbe50>, <Image layer 'Image [4]' at 0x2358a0e17e0>, <Image layer 'Image [5]' at 0x2358a170880>, <Image layer 'Image [6]' at 0x2358a215b40>, <Image layer 'Image [7]' at 0x2358a2bc5b0>, <Image layer 'Image [8]' at 0x2358a355ea0>, <Image layer 'Image [9]' at 0x2358a754ac0>, <Image layer 'Image [10]

## Save data

In [86]:
markers_map = {
    'cycle1': {
        1: 'Hoechst', 
        3: 'Sox2/Oct4',
        4: 'NF-Kb/p-P90rsk',
    },
    'cycle2': {
        1: 'Hoechst', 
        3: 'Cyclin D1/CDK4',
        4: 'TRAIL/DR5'
    },
    'cycle3': {
        1: 'Hoechst', 
        2: 'p-ERK/c-MYC',
    },
    'cycle4': {
        1: 'Hoechst', 
        3: 'Cyclin E/CDK2',
    },
    'cycle5': {
        1: 'Hoechst', 
        2: 'CD20',
        3: 'Phalloidin',
        4: 'CD8'
    },
    'cycle6': {
        1: 'Hoechst', 
        2: 'Pan-cytokeratin',
        4: 'CD4'
    },
    'cycle7': {
        1: 'Hoechst', 
        2: 'Concanavalin A',
        4: 'WGA'
    },
    'cycle8': {
        1: 'HE1',
        2: 'HE2',
        3: 'HE3',
    },
}

def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 

    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        for name in sorted(filenames):
            if "tif" in name:
                # Get information from image name
                n_split = name.split('_')
                                
                cond=dirpath.split('\\')[-1].split('_')[0]
                fov=dirpath.split('\\')[-1].split('_')[1]
                cycle='cycle'+str(int(n_split[0][-2:]))
                ch = int(n_split[1][2])
                try:
                    marker = marker_dict[cycle][ch]
                except:
                    print(cycle,ch,'')
                    continue 
                    
                conditions.append(cond)
                fovs.append(fov)
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            "FOV": fovs,
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def get_min(imgs):
    shapes = np.array([np.array(img.shape) for img in imgs])
    return np.min(shapes, axis=0)

In [92]:
data_raw = data_dir /'tissue'  / 'imgs' /  'registered_crop'
df_meta_path = data_dir /  'tissue' / 'metadata' / 'info_sti.csv'

try:
    df_meta_path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map)
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Folder is already there
Created df


In [93]:
df[df.Cycle == 'cycle8']

Unnamed: 0,Condition,FOV,Cycle,Channels,Markers,Path
19,577-1197,20X,cycle8,1,HE1,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
20,577-1197,20X,cycle8,2,HE2,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
21,577-1197,20X,cycle8,3,HE3,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
41,577-1197,40X,cycle8,1,HE1,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
42,577-1197,40X,cycle8,2,HE2,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
43,577-1197,40X,cycle8,3,HE3,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
63,577-1210,20X,cycle8,1,HE1,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
64,577-1210,20X,cycle8,2,HE2,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
65,577-1210,20X,cycle8,3,HE3,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
85,577-1210,40X,cycle8,1,HE1,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...


In [94]:
df.groupby(['Condition', 'FOV']).size()

Condition  FOV
577-1197   20X    22
           40X    22
577-1210   20X    22
           40X    22
dtype: int64

In [95]:
df_imgs_path = data_dir /  'tissue'  /'metadata' / 'imgs_reg.csv'
temp_path =data_dir /  'tissue'  /'hdf5' / 'registered'
try:
    temp_path.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    group = df.groupby(['Condition','FOV'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        # if file_path.exists():
        #     continue
        
        channels = df_group.Channels.to_list()
        cycles = df_group.Cycle.to_list()
        markers = df_group.Markers.to_list()
        paths = df_group.Path.to_numpy()
            
        imgs = joblib_loop(read_img, paths)
        min_shape = get_min(imgs)
        imgs_cropped = np.array([img[:min_shape[0], :min_shape[1]] for img in imgs])
        info = {"Cycle": cycles, "Channel": channels, "Marker": markers}
            
            # hdf5 as Channel -> Z mapping
        save_hdf5(file_path, 'imgs', imgs_cropped, info)
    df_imgs = pd.DataFrame(rows, columns=['Condition', 'FOV', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Created df


  0%|          | 0/4 [00:00<?, ?it/s]

In [96]:
df_imgs

Unnamed: 0,Condition,FOV,Path
0,577-1197,20X,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
1,577-1197,40X,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
2,577-1210,20X,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...
3,577-1210,40X,y:\coskun-lab\Thomas\23_PLA_revision\data\tiss...


# Segmentation

## Multiplex

In [97]:
import napari 
from skimage import util

In [98]:
df_imgs_path = data_dir /  'tissue' /'metadata' / 'imgs_reg.csv'
df_imgs = pd.read_csv(df_imgs_path)

In [104]:
markers

array(['Hoechst', 'Sox2/Oct4', 'NF-Kb/p-P90rsk', 'Hoechst',
       'Cyclin D1/CDK4', 'TRAIL/DR5', 'Hoechst', 'p-ERK/c-MYC', 'Hoechst',
       'Cyclin E/CDK2', 'Hoechst', 'CD20', 'CD8', 'Hoechst',
       'Pan-cytokeratin', 'CD4', 'Hoechst', 'Concanavalin A', 'WGA',
       'HE1', 'HE2', 'HE3'], dtype=object)

In [106]:
cyto_markers = ['CD8', 'Pan-cytokeratin', 'CD4']

In [107]:
whole_seg_path = data_dir /  'tissue' / 'imgs' / 'segmentation'
whole_seg_path.mkdir(parents=True, exist_ok=True)


# Save combined images
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        imgs = f['imgs'][:]
        markers = f['imgs'].attrs['Marker']

    # Get dapi and cyto imgaes
    indices = np.isin(markers, cyto_markers)
    img_dapi = imgs[3]
    imgs_cyto = imgs[indices,:]
    
    # Contrast streching and combine to rgb image
    img_dapi = contrast_str(img_dapi, n_max=99.9)
    imgs_cyto_scaled = [contrast_str(imgs_cyto[0],  n_min=10, n_max=99.9), 
                        contrast_str(imgs_cyto[1],  n_min=10, n_max=99.9), 
                        contrast_str(imgs_cyto[2],  n_min=10, n_max=99.9)]
    img_cyto = np.max(np.array(imgs_cyto_scaled), axis=0)
    img_rgb = np.stack([np.zeros(img_dapi.shape),img_cyto, img_dapi], axis=0).astype(np.uint8)
    
    # Crop and save
    file_name = f'{"_".join(row[1:3])}.tif'
    file_path = whole_seg_path / file_name
    tiff.imwrite(file_path, img_rgb)

In [103]:
import napari 

napari.view_image(img, channel_axis=0, name=markers, contrast_limits=[0, 2**16])

Viewer(axes=Axes(visible=False, labels=True, colored=True, dashed=False, arrows=True), camera=Camera(center=(0.0, 2218.5, 5580.5), zoom=0.19456190646837485, angles=(0.0, 0.0, 90.0), perspective=0.0, interactive=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 4438.0, 1.0), (0.0, 11162.0, 1.0)), current_step=(2219, 5581), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'Hoechst' at 0x23597178760>, <Image layer 'Sox2/Oct4' at 0x23596f6b220>, <Image layer 'NF-Kb/p-P90rsk' at 0x23596eb8b80>, <Image layer 'Hoechst [1]' at 0x23596e628c0>, <Image layer 'Cyclin D1/CDK4' at 0x23596d56110>, <Image layer 'TRAIL/DR5' at 0x23596c4c7c0>, <Image layer 'Hoechst [2]' at 0x23596b3c7c0>, <Image layer 'p-ERK/c-MYC' at 0x2358e61beb0>, <Image layer 'Hoechst [3]' at 0x2358e414a00>, <Image layer 'Cyclin E/CDK2' at 0x23585bc3bb0>,

## HE