In [1]:
import itertools
import os
import sys
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io

from collections import defaultdict
from tqdm.notebook import trange, tqdm, tqdm_notebook
from joblib import Parallel, delayed
import re
import h5py
import tifffile as tiff
from natsort import natsort_keygen



In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
data_dir = (Path().cwd().parents[0] / 'data').absolute()
data_processed = data_dir / 'processed'
data_raw = r'Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\PLA\HCC827 cell culture 13 PPIs with nuclease P1'


In [4]:
import os
 
os.environ['JDK_HOME'] = "C:\Program Files\Java\jdk-17"
os.environ['JAVA_HOME'] = "C:\Program Files\Java\jdk-17"

os.environ['PATH'] += ';C:\\Program Files\\Java\\jdk-17\\jre\\bin\\server\\;C:\\Program Files\\Java\\jdk-17\\bin\\;C:\\Program Files\\Java\\jdk-17\\bin\\jar.exe'

# Get info

In [39]:
markers_map = {
    'cycle1': {
        1: 'Hoechst', 
        3: 'Sox2/Oct4',
        4: 'NF-Kb/p-P90rsk',
    },
    'cycle2': {
        1: 'Hoechst', 
        3: 'SIRT1/P53',
        4: 'TRAIL/DR5'
    },
    'cycle3': {
        1: 'Hoechst', 
        3: 'Cyclin D1/CDK4',
        4: 'Bim/Tom20'
    },
    'cycle4': {
        1: 'Hoechst', 
        3: 'EGFR/GRB2',
        4: 'FoxO1/AKT'
    },
    'cycle5': {
        1: 'Hoechst', 
        2: 'p-ERK/c-MYC',
    },
    'cycle6': {
        1: 'Hoechst', 
        4: 'Mcl-1/BAK'
    },
    'cycle7': {
        1: 'Hoechst', 
        4: 'Cyclin E/CDK2'
    },
    'cycle8': {
        1: 'Hoechst', 
        4: 'AKT/Mtor'
    },
    'cycle9': {
        1: 'Hoechst', 
        4: 'TEAD1/YAP'
    },
    'cycle10': {
        1: 'Hoechst', 
        2: 'p-EGFR',
        3: 'Phalloidin',
        4: 'Ki67'
    },
   'cycle11': {
        1: 'Hoechst', 
        2: 'NBD-C6',
        4: 'COX IV'
    },
    'cycle12': {
        1: 'Hoechst', 
        2: 'Pan-cytokeratin',
    },
    'cycle13': {
        1: 'Hoechst', 
        2: 'Concanavalin A',
        4: 'WGA'
    },
}

def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 
    
    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        # Don't get the nuclease after bleach channel
        if 'after nuclease' in dirpath or 'Test' in dirpath or 'wrong' in dirpath:
            continue
        
        for name in sorted(filenames):
            if "tif" in name and "sti" in name \
            and 'overlay' not in name \
            and 'Composite' not in name:
                # Get information from image name                
                d_split = dirpath.split('\\')
                condition = d_split[-2].split(' ')[-1]
                fov = d_split[-1].split('_')[-1]
                cycle =  d_split[-1].split('_')[1][3:]
                cycle = 'cycle' + cycle

                n_split = name.split('_')
                ch = int(n_split[-1][-5])
                try: marker = marker_dict[cycle][ch] 
                except: continue
        
                conditions.append(condition)
                fovs.append(fov)
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            'FOV': fovs, 
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

In [53]:
df_meta_path = data_dir / '13cyc' / 'metadata' / 'info.csv'
df_meta_path.parent.mkdir(parents=True, exist_ok=True)

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map)
    df = df.sort_values(
        by=["Condition", "FOV", "Cycle", "Channels"],
        key=natsort_keygen()
    )
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Loaded df


In [54]:
df

Unnamed: 0,Condition,FOV,Cycle,Channels,Markers,Path
0,100nM,FW1,cycle1,1,Hoechst,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
1,100nM,FW1,cycle1,3,Sox2/Oct4,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
2,100nM,FW1,cycle1,4,NF-Kb/p-P90rsk,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
3,100nM,FW1,cycle2,1,Hoechst,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
4,100nM,FW1,cycle2,3,SIRT1/P53,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
...,...,...,...,...,...,...
131,control,FW2,cycle12,1,Hoechst,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
132,control,FW2,cycle12,2,Pan-cytokeratin,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
133,control,FW2,cycle13,1,Hoechst,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
134,control,FW2,cycle13,2,Concanavalin A,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."


In [55]:
df.groupby(['Condition', 'FOV']).size()

Condition  FOV
100nM      FW1    34
           FW2    34
control    FW1    34
           FW2    34
dtype: int64

# Save HDF5

In [40]:
import h5py

def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=True,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [30]:
df_imgs_path = data_dir / '13cyc' / 'metadata' / 'imgs.csv'
df_imgs_path.parent.mkdir(parents=True, exist_ok=True)

temp_path = data_dir  / '13cyc' / 'hdf5' / 'raw'
temp_path.mkdir(parents=True, exist_ok=True)

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    
    group = df.groupby(['Condition', 'FOV'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        group_cycle = df_group.groupby('Cycle')
        for cycle, df_cycle in group_cycle:
            channels = df_cycle.Channels.to_list()
            markers = df_cycle.Markers.to_list()
            paths = df_cycle.Path.to_numpy()
    
            imgs = joblib_loop(read_img, paths)
            imgs = np.array(imgs)
            info = {"Channels": channels, "Markers": markers}
            
            # hdf5 as Channel -> Z mapping
            save_hdf5(file_path, cycle, imgs, info)
    df_imgs = pd.DataFrame(rows, columns=['Condition', 'FOV', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Loaded df


In [31]:
df_imgs

Unnamed: 0,Condition,FOV,Path
0,100nM,FW1,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
1,100nM,FW2,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
2,control,FW1,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
3,control,FW2,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...


# Save Tiffile

In [12]:
# from skimage import exposure, util

# def contrast_str(img, n_min=0.1, n_max=100):
#     p2, p98 = np.percentile(img, (n_min, n_max))
#     img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
#     return img_rescale

# def make_imgs_same_dim(imgs):
#     # Get max dimensions
#     shapes = np.array([img.shape[1:] for img in imgs])
#     min_x, min_y = shapes.min(axis=0)
#     imgs_cropped = [img[:, :min_x, :min_y] for img in imgs]
#     # imgs_cropped[0] = contrast_str(imgs_cropped[0])
#     return imgs_cropped


In [13]:
# save_path = data_dir /  '13cyc' /'imgs' / 'raw'
# save_path.mkdir(parents=True, exist_ok=True)

# group = df_imgs.groupby(['Condition', 'FOV'])
# for name, df_group in group:
#     path = df_group.iloc[0].Path
             
#     # Read images
#     cycles = []
#     imgs_all = []
#     channels = []
#     with h5py.File(path, "r") as f:
#         for k in tqdm(f.keys(), total=len(f.keys()), leave=False):
#             cycle = k.split('_')[0]
#             channel = f[k].attrs['Channels']

#             imgs = f[k][:]
#             cycles.append(cycle)
#             channels.append(channel)
#             imgs[0] = contrast_str(imgs[0])
#             imgs_all.append(imgs)
    
#     imgs_same_shape = make_imgs_same_dim(imgs_all)
    
#     for i, imgs in enumerate(imgs_same_shape):
#         temp_path = save_path / '_'.join(np.array(name).astype(str))
#         temp_path.mkdir(parents=True, exist_ok=True)

#         file_name = f'001_{cycles[i]}.tif'
#         file_path = temp_path / file_name

#         # Write image
#         tiff.imwrite(file_path, imgs)


# Registration Ashlar

In [14]:
# from ashlar import fileseries, thumbnail,reg
# import matplotlib.pyplot as plt
# from ashlar.scripts.ashlar import process_axis_flip

In [15]:
# # Loop all images
# imgs_dir = data_dir / '13cyc' /'imgs' / 'raw'
# save_dir = data_dir / '13cyc' /'imgs' / 'registered'
# save_dir.mkdir(parents=True, exist_ok=True)

# imgs_dir_list = os.listdir(imgs_dir)

# for dir_path in tqdm(imgs_dir_list):
    
#     # Create reader for each cycle
#     readers = []
#     for i in range(1, 14):
#         reader = fileseries.FileSeriesReader(
#             str(imgs_dir / dir_path),
#             pattern='{series}_'+f'cycle{i}.tif',
#             overlap=0.29,
#             width=1,
#             height=1,
#             layout='snake',
#             direction='horizontal',
#             pixel_size=0.18872, 
#         )
#         readers.append(reader)
#     reader_1 = readers[0]
    
#     # Run stitching
#     aligner0 = reg.EdgeAligner(reader_1, channel=0, filter_sigma=2, verbose=False,)
#     aligner0.run()
    
#     # Generate merge image for 1 cycle
#     # Parramter
#     mosaic_args = {}
#     mosaic_args['verbose'] = False

#     mosaic = reg.Mosaic(
#             aligner0,aligner0.mosaic_shape,**mosaic_args
#         )
#     writer_class = reg.TiffListWriter
#     writer = writer_class(
#             [mosaic], str(save_dir / (dir_path + '_cycle1_ch{channel}.ome.tif'))
#     )
#     writer.run()
    
#     # Loop through rest of cycles
#     aligners = list()
#     aligners.append(aligner0)

#     for j in range(1, len(readers)):
#         aligners.append(
#             reg.LayerAligner(readers[j], aligners[0], channel=0, filter_sigma=2, verbose=False)
#         )
#         aligners[j].run()
#         mosaic = reg.Mosaic(
#             aligners[j], aligners[0].mosaic_shape,**mosaic_args
#         )
#         writer = writer_class(
#                 [mosaic], str(save_dir / (dir_path +'_cycle'+str(j+1)+'_ch{channel}.ome.tif'))
#         )
#         writer.run()
    

## Save Data

# Registration Image J

## Registration

In [59]:
from skimage import exposure, util

def contrast_str(img, n_min=0.1, n_max=100):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

def make_imgs_same_dim(imgs):
    # Get max dimensions
    shapes = np.array([img.shape[1:] for img in imgs])
    min_x, min_y = shapes.min(axis=0)
    imgs_cropped = [img[:, :min_x, :min_y] for img in imgs]
    # imgs_cropped[0] = contrast_str(imgs_cropped[0])
    return imgs_cropped

In [68]:
import tifffile as tf
from PIL import Image
import PIL.Image
PIL.Image.MAX_IMAGE_PIXELS = 933120000
import shutil
from datetime import date, datetime
import skimage.io 
from skimage import util
from skimage.transform import resize

In [69]:
group = df_imgs.groupby(['Condition', 'FOV'])
dim = {}
for name, df_group in group:
    path = df_group.iloc[0].Path
             
    # Read images
    cycles = []
    imgs_all = []
    channels = []

    with h5py.File(path, "r") as f:
        for k in f.keys():
            # Read immage in info
            cycle = k.split('_')[0][5:]
            channel = f[k].attrs['Channels']
            if cycle == '2':
                dim[name] = [f[k].shape[1], f[k].shape[2]]

In [70]:
regSavePath = data_dir / '13cyc' /'imgs' / 'registered_imagej'
regSavePath.mkdir(parents=True, exist_ok=True)

chs = [1, 2, 3, 4]
group = df_imgs.groupby(['Condition', 'FOV'])
for name, df_group in group:
    path = df_group.iloc[0].Path
             
    # Read images
    cycles = []
    imgs_all = []
    channels = []

    with h5py.File(path, "r") as f:
        for k in tqdm(f.keys(), total=len(f.keys()), leave=False):
            # Read immage in info
            cycle = k.split('_')[0][5:]
            channel = f[k].attrs['Channels']
            imgs = f[k][:]
            
            if cycle == '1':
                dim_x = dim[name][0]
                dim_y = dim[name][1]
                imgs = resize(imgs, (len(imgs), dim_x, dim_y))
                imgs = util.img_as_ubyte(imgs)

            for ch in chs:
                # Save path per Channel
                folderPath = os.path.join(regSavePath, '_'.join(name), 'Original', 'CH' + str(ch)) # 1 index
                if not os.path.exists(folderPath):
                    os.makedirs(folderPath, exist_ok = True)
                
                fileOut = 'CH' + str(ch) + '_Cycle' + str(cycle).zfill(2) + '.tif'
                fileOut = os.path.join(folderPath, fileOut)
                # if os.path.exists(fileOut):
                #     continue
                if ch in channel:
                    if ch == 1:
                        img = contrast_str(imgs[list(channel).index(ch)], n_min=0.1, n_max=99.9)
                    else:
                        img = imgs[list(channel).index(ch)]
                    tf.imwrite(fileOut, img, photometric = 'minisblack', bigtiff = True)

                else:
                    emptyImage = np.zeros(imgs[0].shape, np.uint8)
                    # print('Dont exist create empty image', cycle, ch)
                    tf.imwrite(fileOut, emptyImage, photometric = 'minisblack', bigtiff = True)

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

In [71]:
group = df_imgs.groupby(['Condition', 'FOV'])
chs = [1, 2, 3, 4]

for name, channels in group:
    name = '_'.join(name)
    '''
    run("Register Virtual Stack Slices", "source=[Y:/coskun-lab/Nicky/07 Temp/register large stitch] output=[Y:/coskun-lab/Nicky/07 Temp/register output] feature=Rigid registration=[Rigid                -- translate + rotate                  ] advanced shrinkage save save_dir=[Y:/coskun-lab/Nicky/07 Temp/register output] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Rigid registration_model=[Rigid                -- translate + rotate                  ] interpolate");
    run("Transform Virtual Stack Slices", "source = [Y:/coskun-lab/Nicky/07 Temp/other channels/original] output = [Y:/coskun-lab/Nicky/07 Temp/other channels/original] transforms = [Y:/coskun-lab/Nicky/07 Temp/register output] interpolate");
    '''
    # folder to save registered images separated by channel to apply transforms
    # create all folder
    for ii, ch in enumerate(chs): # all channels
        os.makedirs(os.path.join(regSavePath, name, 'Original', 'CH' + str(ch)), exist_ok = True)
        os.makedirs(os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch)), exist_ok = True)
    
    os.chdir(os.path.join(regSavePath, name, 'Original', 'CH1'))
    now = datetime.now() # current date and time
    date_time = now.strftime("%d%b%Y")
    macro = open(date_time + '_register_transforms.ijm', 'w')
    
    # register cycles on CH1
    macro.write('run("Register Virtual Stack Slices", "source=[')
    # original files
    macro.write(os.path.join(regSavePath, name, 'Original', 'CH1').replace('\\', '/'))
    macro.write('] output=[')
    # registered output files
    macro.write(os.path.join(regSavePath, name, 'Registered', 'CH1').replace('\\', '/'))
    
    # # Rigid registration: translation + rotation
    # macro.write('] feature=Rigid registration=[Rigid                -- translate + rotate                  ] advanced shrinkage save save_dir=[')
    # # folder to save recorded transformations 
    # macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Rigid registration_model=[Rigid                -- translate + rotate                  ] interpolate"); \n')
    
    # # bigwrap registration
    # macro.write('] feature=Similarity registration=[Elastic              -- bUnwarpJ splines                    ] advanced shrinkage save save_dir=[')
    # # folder to save recorded transformations 
    # macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Similarity registration_model=[[Elastic              -- bUnwarpJ splines                    ] interpolate registration=Mono image_subsample_factor=0 initial_deformation=[Very Coarse] final_deformation=Fine divergence_weight=0.1 curl_weight=0.1 landmark_weight=1 image_weight=0 consistency_weight=0 stop_threshold=0.01 shear=0.95 scale=0.95 isotropy=1"); \n')
    
    # Or use similarity: translation + rotation + isotropic scale
    macro.write('] feature=Similarity registration=[Similarity           -- translate + rotate + isotropic scale] advanced shrinkage save save_dir=[')
    # folder to save recorded transformations 
    macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=25 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=50 inlier_ratio=0.05 feature_extraction_model=Similarity registration_model=[Similarity           -- translate + rotate + isotropic scale] interpolate"); \n')
    
    macro.write('run("Close All"); \n\n')
    
    # now apply transform to other channels
    for ii, ch in enumerate([1,2,3,4]): # each other channel
        
        macro.write('run("Transform Virtual Stack Slices", "source=[')
        # unregsitered folder
        macro.write(os.path.join(regSavePath, name, 'Original', 'CH' + str(ch)).replace('\\', '/'))
        macro.write('] output=[')
        # registered folder
        macro.write(os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch)).replace('\\', '/'))
        macro.write('] transforms=[')
        macro.write(os.path.join(regSavePath, name, 'Original', 'CH1').replace('\\', '/')) # stored in original registration folder
        macro.write('] interpolate"); \n')
        macro.write('run("Close All"); \n\n')
    
    macro.close()
    
    # print command to run macro
    print('runMacro("' + os.path.join(regSavePath, name, 'Original', 'CH1', macro.name).replace('\\', '/') + '");')

runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/13cyc/imgs/registered_imagej/100nM_FW1/Original/CH1/18Nov2023_register_transforms.ijm");
runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/13cyc/imgs/registered_imagej/100nM_FW2/Original/CH1/18Nov2023_register_transforms.ijm");
runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/13cyc/imgs/registered_imagej/control_FW1/Original/CH1/18Nov2023_register_transforms.ijm");
runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/13cyc/imgs/registered_imagej/control_FW2/Original/CH1/18Nov2023_register_transforms.ijm");


## Combine all registered images into single folder

In [32]:
regSavePath = data_dir / '13cyc' /'imgs' / 'registered_imagej'

regSaveFinalPath = data_dir / '13cyc' / 'imgs' / 'registered_imagej_final'
regSaveFinalPath.mkdir(parents=True, exist_ok=True)

regSaveCropPath = data_dir /'13cyc'  / 'imgs' /  'registered_crop'
regSaveCropPath.mkdir(parents=True, exist_ok=True)

In [74]:
group = df.groupby(['Condition', 'FOV'])

for name, channels in group:
    name = '_'.join(name)
    for ii, cycle in enumerate(tqdm(channels['Cycle'].unique())): # each cycle
    
        dfCycle = channels.loc[channels['Cycle'] == cycle]
        dfCycle.reset_index(drop = True, inplace = True) # index is channel - 1
        cycle = cycle[5:]
        for jj, ch in enumerate(dfCycle.Channels): # each channel
            
            # find registered file
            tifPath = os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch), 'CH' + str(ch)+ '_Cycle' + str(cycle).zfill(2) + '.tif')

            # File out
            fileOut = 'Cycle' + str(cycle).zfill(2) + \
            '_' + 'CH' + str(ch) + '.tif'
            folder = regSaveFinalPath / name
            folder.mkdir(parents=True, exist_ok=True)
            fileOut = os.path.join(regSaveFinalPath, name, fileOut)
            # print(tifPath)
            # Copy
            if os.path.exists(tifPath):
                shutil.copyfile(tifPath, fileOut)
            else:
                continue

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

In [36]:
#### Cropped image to smallest bounding box of non black region

# Get channel list
group = df.groupby(['Condition', 'FOV'])

for name, df_group in group:
    channels = df_group.Channels.tolist()
    break

# Crop
for dir in os.listdir(regSaveFinalPath):

    # Read imgs
    imgs = []
    paths = []
    for file in os.listdir(regSaveFinalPath / dir):
        if 'tif' in file:
            path = regSaveFinalPath / dir/ file
            imgs.append(tiff.imread(path))
            paths.append(file)

    # Get bboxs
    bboxs = []
    for i, img in enumerate(imgs):
        if channels[i] != 1:
            continue
        bbox = skimage.measure.regionprops((img>0).astype(np.uint8))[0]['bbox']
        bboxs.append(np.array(bbox))
    bboxs = np.stack(bboxs)

    bbox_final = [np.max(bboxs[:,0]),
                np.max(bboxs[:,1]),
                np.min(bboxs[:,2]),
                np.min(bboxs[:,3])]

    min_row, min_col, max_row, max_col = bbox_final

    # Save cropped images
    save_dir = regSaveCropPath / dir
    save_dir.mkdir(parents=True, exist_ok=True)
    for i, img in enumerate(imgs):
        save_path = save_dir / paths[i]
        tiff.imwrite(save_path, img[min_row:max_row, min_col:max_col], bigtiff = True)

## Save data

In [41]:
from skimage import util

def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 

    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        for name in sorted(filenames):
            if "tif" in name:
                # Get information from image name
                n_split = name.split('_')
                                
                cond=dirpath.split('\\')[-1].split('_')[0]
                fov=dirpath.split('\\')[-1].split('_')[1]
                cycle='cycle'+str(int(n_split[0][-2:]))
                ch = int(n_split[1][2])
                try:
                    marker = marker_dict[cycle][ch]
                except:
                    continue 
                    
                conditions.append(cond)
                fovs.append(fov)
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            "FOV": fovs,
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def get_min(imgs):
    shapes = np.array([np.array(img.shape) for img in imgs])
    return np.min(shapes, axis=0)

In [42]:
data_raw = data_dir /'13cyc'  / 'imgs' /  'registered_crop'
df_meta_path = data_dir /  '13cyc' / 'metadata' / 'info_sti.csv'

try:
    df_meta_path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map)
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Folder is already there
Created df


In [43]:
df.groupby(['Condition', 'FOV']).size()

Condition  FOV
100nM      FW1    34
           FW2    34
control    FW1    34
           FW2    34
dtype: int64

In [44]:
df_imgs_path = data_dir / '13cyc' /'metadata' / 'imgs_reg.csv'
temp_path =data_dir / '13cyc' /'hdf5' / 'registered'
try:
    temp_path.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    group = df.groupby(['Condition','FOV'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        # if file_path.exists():
        #     continue
        
        channels = df_group.Channels.to_list()
        cycles = df_group.Cycle.to_list()
        markers = df_group.Markers.to_list()
        paths = df_group.Path.to_numpy()
            
        imgs = joblib_loop(read_img, paths)
        min_shape = get_min(imgs)
        imgs_cropped = np.array([img[:min_shape[0], :min_shape[1]] for img in imgs])
        info = {"Cycle": cycles, "Channel": channels, "Marker": markers}

        imgs_cropped[np.where(imgs_cropped.max((1,2))>1)] = imgs_cropped[np.where(imgs_cropped.max((1,2))>1)] / 255
        imgs_cropped = util.img_as_ubyte(imgs_cropped)
        
        # hdf5 as Channel -> Z mapping
        save_hdf5(file_path, 'imgs', imgs_cropped, info)
    df_imgs = pd.DataFrame(rows, columns=['Condition', 'FOV', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Created df


  0%|          | 0/4 [00:00<?, ?it/s]

# Segmentation

## Generate images

In [5]:
import napari 
from skimage import exposure, util

def contrast_str(img, n_min=0.1, n_max=99.9):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

In [6]:
df_imgs_path = data_dir /  '13cyc' /'metadata' / 'imgs_reg.csv'
df_imgs = pd.read_csv(df_imgs_path)

In [9]:

# Save combined images
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        print(f['imgs'].attrs.keys())

<KeysViewHDF5 ['Channel', 'Cycle', 'Marker']>
<KeysViewHDF5 ['Channel', 'Cycle', 'Marker']>
<KeysViewHDF5 ['Channel', 'Cycle', 'Marker']>
<KeysViewHDF5 ['Channel', 'Cycle', 'Marker']>


In [55]:
cyto_markers = ['p-EGFR', 'Phalloidin']

In [56]:
markers

array(['Hoechst', 'Sox2/Oct4', 'NF-Kb/p-P90rsk', 'Hoechst', 'SIRT1/P53',
       'TRAIL/DR5', 'Hoechst', 'Cyclin D1/CDK4', 'Bim/Tom20', 'Hoechst',
       'EGFR/GRB2', 'FoxO1/AKT', 'Hoechst', 'p-ERK/c-MYC', 'Hoechst',
       'Mcl-1/BAK', 'Hoechst', 'Cyclin E/CDK2', 'Hoechst', 'AKT/Mtor',
       'Hoechst', 'TEAD1/YAP', 'Hoechst', 'p-EGFR', 'Phalloidin', 'Ki67',
       'Hoechst', 'NBD-C6', 'COX IV', 'Hoechst', 'Pan-cytokeratin',
       'Hoechst', 'Concanavalin A', 'WGA'], dtype=object)

In [60]:
whole_seg_path = data_dir /  '13cyc' / 'imgs' / 'segmentation'
whole_seg_path.mkdir(parents=True, exist_ok=True)


# Save combined images
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        imgs = f['imgs'][:]
        markers = f['imgs'].attrs['Marker']

    # Get dapi and cyto imgaes
    indices = np.isin(markers, cyto_markers)
    img_dapi = imgs[3]
    imgs_cyto = imgs[indices,:]
    
    # Contrast streching and combine to rgb image
    img_dapi = contrast_str(img_dapi, n_max=99.9)
    imgs_cyto_scaled = [contrast_str(imgs_cyto[0], n_max=99.9), contrast_str(imgs_cyto[1], n_max=99.9)]
    img_cyto = np.max(np.array(imgs_cyto_scaled), axis=0)
    img_rgb = np.stack([np.zeros(img_dapi.shape),img_cyto, img_dapi], axis=0).astype(np.uint8)
    
    # Crop and save
    file_name = f'{"_".join(row[1:3])}.tif'
    file_path = whole_seg_path / file_name
    tiff.imwrite(file_path, img_rgb)

In [48]:
# import napari 

# napari.view_image(img[np.where(markers=='Hoechst')], channel_axis=0, visible=False, contrast_limits=[0, 256])

In [52]:
napari.view_image(img, channel_axis=0, name=markers,contrast_limits=[0,255], visible=False)

Viewer(axes=Axes(visible=False, labels=True, colored=True, dashed=False, arrows=True), camera=Camera(center=(0.0, 1707.0, 4274.0), zoom=0.254029711077319, angles=(0.0, 0.0, 90.0), perspective=0.0, interactive=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 3415.0, 1.0), (0.0, 8549.0, 1.0)), current_step=(1707, 4274), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'Hoechst' at 0x2e5201b4670>, <Image layer 'Sox2/Oct4' at 0x2e52015e7d0>, <Image layer 'NF-Kb/p-P90rsk' at 0x2e52755a110>, <Image layer 'Hoechst [1]' at 0x2e52d6642e0>, <Image layer 'SIRT1/P53' at 0x2e52d6ee470>, <Image layer 'TRAIL/DR5' at 0x2e52d798640>, <Image layer 'Hoechst [2]' at 0x2e52d8227d0>, <Image layer 'Cyclin D1/CDK4' at 0x2e52d8f0310>, <Image layer 'Bim/Tom20' at 0x2e52d956b30>, <Image layer 'Hoechst [3]' at 0x2e52da206d0>, <Image l

## Segmentation using cellpose

In [1]:
from skimage import exposure, util
from cellpose import core, utils, io, models, metrics

use_GPU = core.use_gpu()
yn = ['NO', 'YES']
print(f'>>> GPU activated? {yn[use_GPU]}')

def contrast_str(img, n_min=0.1, n_max=99.95):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

>>> GPU activated? YES


In [2]:
whole_seg_path = data_dir /  '13cyc' / 'imgs' / 'segmentation'

mask_path = data_dir  / '13cyc' / 'imgs' / 'masks'
mask_path.mkdir(parents=True, exist_ok=True)

NameError: name 'data_dir' is not defined

In [None]:
# Cyto segmentaion
masks = []
for p in os.listdir(whole_seg_path):
    if 'tif' not in p:
        continue
    img = skimage.io.imread(whole_seg_path / p).transpose((2,0,1))
    
    # Cyto segmentation
    model = models.CellposeModel(gpu=True, model_type='cyto2')
    mask_cyto, flows, styles = model.eval(img, 
                                  channels=[2,3],
                                  diameter=150,
                                  flow_threshold=0.3,
                                  cellprob_threshold=0.0
                                  )
    
    file_path = mask_path / p
    tiff.imwrite(file_path, mask_cyto)
    
    # Nuclei segemtnation
    model = models.CellposeModel(gpu=True, model_type='nuclei')
    mask_nuclei, flows, styles = model.eval(img, 
                                  channels=[3,0],
                                  diameter=100,
                                #   flow_threshold=0.6,
                                  )
    
        
    file_path = mask_path / f'Nuclei_{p}'
    tiff.imwrite(file_path, mask_nuclei)
