In [4]:
import itertools
import os
import sys
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io

from collections import defaultdict
from tqdm.auto import tqdm
from joblib import Parallel, delayed
import re
import h5py
import napari
from natsort import natsorted, natsort_keygen


In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
data_dir = (Path().cwd().parents[0] / 'data').absolute()
data_raw = r'Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\PLA\HCC827 cell culture 20Apr23'


# Get info

In [4]:
markers_map = {
    'cycle1': {
        1: 'Hoeschst', 
        3: 'Cyclin D1 & CDK2',
        4: 'NF-Kb & p-P90rsk'
    },
    'cycle2': {
        1: 'Hoeschst', 
        3: 'Sox2 & Oct4',
        4: 'Bim & Tom20'
    },
    'cycle3': {
        1: 'Hoeschst', 
        2: 'Mcl-1 & BAK'
    },
    'cycle4': {
        1: 'Hoeschst', 
        2: 'p-ERK & c-MYC'
    },
    'cycle5': {
        1: 'Hoeschst', 
        2: 'TEAD1 & YAP'
    },
    'cycle6': {
        1: 'Hoeschst',
        2: 'Cyclin E & CDK4',
    },
    'cycle7': {
        1: 'Hoeschst',
        2: 'P-AKT & mTOR',
    },
    'cycle8': {
        1: 'Hoeschst',
        2: 'p-EGFR',
        3: 'Phalloidin',
        4: 'Ki67'
    },
    'cycle9': {
        1: 'Hoeschst',
        3: 'Cyclin D1 & CDK2 - re',
        4: 'NF-Kb & p-P90rsk - re'
    },
    'cycle10': {
        1: 'Hoeschst',
        2: 'NBD-C6',
        3: 'Golph4',
        4: 'COX IV'
    },
    'cycle11': {
        1: 'Hoeschst',
        2: 'Pan-cytokeratin',
    },
    'cycle12': {
        1: 'Hoeschst',
        2: 'Concanavalin A',
        4: 'WGA'
    },
}

def get_info(data_raw, marker_dict = markers_map):
    timepoints = []
    fovs = []
    cycles = []
    afs = []
    channels = []
    markers = []
    rois = []
    z_stacks = []
    paths = [] 
    
    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        for name in natsorted(filenames):
            if "tif" in name and 'Z0' in name \
            and "stitched" not in name \
            and 'Overlay' not in name \
            and 'Composite' not in name \
            and 'defocused' not in dirpath:
                # Get information from image name
                d_split = dirpath.split('\\')
                n_split = name.split('_')
                                
                time = d_split[-1].split('_')[0]
                fov = d_split[-1].split('_')[-1]
                
                ch = int(n_split[3][2])
                roi = int(n_split[1])
                z = int(n_split[2][1:])
                try:
                    cycle = d_split[-1].split('_')[1]
                    if 'Af' in cycle:
                        after_bleach = True
                    else:
                        after_bleach = False
                    cycle = f'cycle{cycle[3:]}'
                    marker = marker_dict[cycle][ch]
                except:
                    continue
        
                timepoints.append(time)
                fovs.append(fov)
                cycles.append(cycle)
                afs.append(after_bleach)
                channels.append(ch)
                markers.append(marker)
                rois.append(roi)
                z_stacks.append(z)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Timepoint": timepoints,
            "FOV": fovs,
            "Cycle": cycles,
            "AfBleach": afs,
            "Channels": channels,
            "Markers": markers,
            "ROI": rois,
            "Z": z_stacks,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

In [5]:
df_meta_path = data_dir / '9PPI Cell Culture' / '3D' / 'metadata' / 'info.csv'

try:
    df_meta_path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw)
    df.loc[df.Timepoint == 'HCC827Osimj', 'Timepoint'] = 'HCC827Osim'
    df.loc[df.Timepoint == 'HCC827ctrl', 'Timepoint'] = 'HCC827Ctrl'

    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Folder is already there
Loaded df


In [6]:
df = df[df.AfBleach==False]

In [7]:
df.head()

Unnamed: 0,Timepoint,FOV,Cycle,AfBleach,Channels,Markers,ROI,Z,Path
0,HCC827Osim,FW1,cycle10,False,1,Hoeschst,1,1,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
1,HCC827Osim,FW1,cycle10,False,2,NBD-C6,1,1,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
2,HCC827Osim,FW1,cycle10,False,3,Golph4,1,1,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
3,HCC827Osim,FW1,cycle10,False,4,COX IV,1,1,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
4,HCC827Osim,FW1,cycle10,False,1,Hoeschst,1,2,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."


In [8]:
df.Cycle.unique()

array(['cycle10', 'cycle11', 'cycle12', 'cycle1', 'cycle2', 'cycle3',
       'cycle4', 'cycle5', 'cycle6', 'cycle7', 'cycle8', 'cycle9'],
      dtype=object)

In [9]:
df.Timepoint.unique()

array(['HCC827Osim', 'HCC827Ctrl'], dtype=object)

In [10]:
group = df.groupby(['Timepoint','FOV', 'AfBleach'])
for name, df_group in tqdm(group, total=len(group)):
    df_group = df_group.sort_values(
        by=["Cycle", "Channels"],
        key=natsort_keygen()
    )
    print(name)
    print(df_group.Cycle.unique())
    print(df_group.Markers.unique())

  0%|          | 0/4 [00:00<?, ?it/s]

('HCC827Ctrl', 'FW1', False)
['cycle1' 'cycle2' 'cycle3' 'cycle4' 'cycle5' 'cycle6' 'cycle7' 'cycle8'
 'cycle9' 'cycle10' 'cycle11' 'cycle12']
['Hoeschst' 'Cyclin D1 & CDK2' 'NF-Kb & p-P90rsk' 'Sox2 & Oct4'
 'Bim & Tom20' 'Mcl-1 & BAK' 'p-ERK & c-MYC' 'TEAD1 & YAP'
 'Cyclin E & CDK4' 'P-AKT & mTOR' 'p-EGFR' 'Phalloidin' 'Ki67'
 'Cyclin D1 & CDK2 - re' 'NF-Kb & p-P90rsk - re' 'NBD-C6' 'Golph4'
 'COX IV' 'Pan-cytokeratin' 'Concanavalin A' 'WGA']
('HCC827Ctrl', 'FW2', False)
['cycle1' 'cycle2' 'cycle3' 'cycle4' 'cycle5' 'cycle6' 'cycle7' 'cycle8'
 'cycle9' 'cycle10' 'cycle11' 'cycle12']
['Hoeschst' 'Cyclin D1 & CDK2' 'NF-Kb & p-P90rsk' 'Sox2 & Oct4'
 'Bim & Tom20' 'Mcl-1 & BAK' 'p-ERK & c-MYC' 'TEAD1 & YAP'
 'Cyclin E & CDK4' 'P-AKT & mTOR' 'p-EGFR' 'Phalloidin' 'Ki67'
 'Cyclin D1 & CDK2 - re' 'NF-Kb & p-P90rsk - re' 'NBD-C6' 'Golph4'
 'COX IV' 'Pan-cytokeratin' 'Concanavalin A' 'WGA']
('HCC827Osim', 'FW1', False)
['cycle1' 'cycle2' 'cycle3' 'cycle4' 'cycle5' 'cycle6' 'cycle7' 'cycle8'
 '

# Save hdf5

In [29]:
import h5py

def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        chunk_shape = (1,) + data_shape[1:]
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=True,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def test_data_exist(file_path, name):
    with h5py.File(file_path, "r") as h5fout:
        if name in h5fout:
            return True
        else:
            return False
    
def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [17]:
df_imgs_path = data_dir / '9PPI Cell Culture' / '3D' / 'metadata' / 'imgs.csv'

try:
    df_imgs_path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")
    
temp_path = data_dir / '9PPI Cell Culture' / '3D' / 'hdf5' / 'raw'
try:
    temp_path.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    
    group = df.groupby(['Timepoint','FOV', 'AfBleach', 'ROI'])
    rows = []
    
    for name, df_group in tqdm(group, total=len(group)):
        df_group = df_group.sort_values( # Sort by cycle and channels
            by=["Cycle", "Channels", 'Z'],
            key=natsort_keygen()
        )
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        group_channel = df_group.groupby(['Cycle', 'Channels'])
        for n, df_channel in group_channel:
            try:
                if test_data_exist(file_path, '_'.join(np.array(n).astype(str))):
                    continue
            except:pass
            
            marker = df_channel.iloc[0].Markers
            paths = df_channel.Path.to_numpy()

            imgs = joblib_loop(read_img, paths)
            imgs = np.array(imgs)
            info = {"Cycle": n[0], "Channel": n[1], "Marker": marker, "Z": df_channel.Z.to_numpy()}
            
            # hdf5 as Channel -> Z mapping
            save_hdf5(file_path, '_'.join(np.array(n).astype(str)), imgs, info)
    df_imgs = pd.DataFrame(rows, columns=['Timepoint', 'FOV', 'AfBleach', 'ROI', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Folder is already there
Folder is already there
Loaded df


# Save Tifffile

In [4]:
import tifffile as tiff
from skimage import exposure, util

save_path = data_dir / '9PPI Cell Culture' / '3D_Whole' / 'imgs' / 'raw_norm'

def contrast_str(img, n_min=0.01, n_max=99.95):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    return img_rescale

def make_imgs_same_dim(imgs):
    # Get max dimensions
    shapes = np.array([img.shape[1:] for img in imgs])
    min_x, min_y = shapes.min(axis=0)
    imgs_cropped = [img[:, :min_x, :min_y] for img in imgs]
    for i, img in enumerate(imgs_cropped):
        imgs_cropped[i][0,...] = contrast_str(imgs_cropped[i][0,...])
    return imgs_cropped

In [19]:
save_path.mkdir(parents=True, exist_ok=True)

group = df_imgs.groupby(['Timepoint', 'FOV'])
for name, df_group in group:
    
    path = df_group.iloc[0].Path
    for i, row in tqdm(enumerate(df_group.itertuples()), total=len(df_group)):
        path = row.Path

        # Read images
        cycles = []
        imgs_all = []
        channels = []
        with h5py.File(path, "r") as f:
            for k in tqdm(f.keys(), total=len(f.keys()), leave=False):
                cycle = k.split('_')[0]
                channel = f[k].attrs['Channel']

                imgs = f[k][:]
                cycles.append(cycle)
                channels.append(channel)
                imgs_all.append(imgs)
                       
        cycles = np.array(cycles)
        channels = np.array(channels)

        # Get imgs_stacked
        z_min = np.min([i.shape[0] for i in imgs_all])
        imgs_stacked = np.stack([i[:z_min] for i in imgs_all])
        for cycle in np.unique(cycles):
            indices = np.where(cycles == cycle)[0]
            imgs = imgs_stacked[indices,...]

            for z in range(imgs.shape[1]):
                 # Create temp path
                name = [row[1], row[2], row[3], z]
                temp_path =  save_path / '_'.join(np.array(name).astype(str))
                temp_path.mkdir(parents=True, exist_ok=True)

                file_name = '_'.join(np.array(["{:03d}".format(row[4]), cycle]).astype(str)) + '.tif'
                file_path = temp_path / file_name

                # if os.path.exists(file_path):
                #     continue
                
                # Write image
                tiff.imwrite(file_path, imgs[:,z,...])

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

# Registration

In [5]:
from ashlar import fileseries, thumbnail,reg
import matplotlib.pyplot as plt
from ashlar.scripts.ashlar import process_axis_flip

In [6]:
condition_test = 'HCC827Osim'

In [7]:
# Loop all images
thumb_dir = data_dir / '9PPI Cell Culture' / '3D_Whole' / 'thumbnails'
imgs_dir = data_dir / '9PPI Cell Culture' / '3D_Whole' / 'imgs' / 'raw_norm'
save_dir = data_dir / '9PPI Cell Culture' / '3D_Whole' / 'imgs' / 'registered_norm'

save_dir .mkdir(parents=True, exist_ok=True)
imgs_dir_list = os.listdir(imgs_dir)
thumb_dir.mkdir(parents=True, exist_ok=True)

for dir_path in tqdm(imgs_dir_list):
        
    # Create reader for each cycle
    readers = []
    for i in range(1, 13):
        reader = fileseries.FileSeriesReader(
            str(imgs_dir / dir_path),
            pattern='{series}_cycle'+f'{i}.tif',
            overlap=0.29,
            width=6,
            height=3,
            layout='snake',
            direction='horizontal',
            pixel_size=0.18872, 
        )
        readers.append(reader)
    reader_1 = readers[0]
    
    # Run stitching
    aligner0 = reg.EdgeAligner(reader_1, channel=0, filter_sigma=2, verbose=False,)
    aligner0.run()
    
    # Generate merge image for 1 cycle
    # Parramter
    mosaic_args = {}
    mosaic_args['verbose'] = False

    mosaic = reg.Mosaic(
            aligner0,aligner0.mosaic_shape,**mosaic_args
        )
    writer_class = reg.TiffListWriter
    writer = writer_class(
            [mosaic], str(save_dir / (dir_path + '_cycle1_ch{channel}.ome.tif'))
    )
    writer.run()
    
    # Loop through rest of cycles
    aligners = list()
    aligners.append(aligner0)

    for j in range(1, 12):
        aligners.append(
            reg.LayerAligner(readers[j], aligners[0], channel=0, filter_sigma=2, verbose=False)
        )
        aligners[j].run()
        mosaic = reg.Mosaic(
            aligners[j], aligners[0].mosaic_shape,**mosaic_args
        )
        writer = writer_class(
                [mosaic], str(save_dir / (dir_path +'_cycle'+str(j+1)+'_ch{channel}.ome.tif'))
        )
        writer.run()
    

  0%|          | 0/96 [00:00<?, ?it/s]

    assembling thumbnail 18/18
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [-4. 45.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [-16.  35.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [-6. 40.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [ 4. 41.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [ 5. 41.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [14.  8.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [-17.  14.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [ 5. 19.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [-29.  44.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [-32.  25.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [-20.  23.]
    assembling thumbnail 18/18
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [-4. 45.]
    assembling t

# Save data

In [7]:
markers_map = {
    'cycle1': {
        'ch0': 'Hoeschst', 
        'ch1': 'Cyclin D1 & CDK2',
        'ch2': 'NF-Kb & p-P90rsk'
    },
    'cycle2': {
        'ch0': 'Hoeschst', 
        'ch1': 'Sox2 & Oct4',
        'ch2': 'Bim & Tom20'
    },
    'cycle3': {
        'ch0': 'Hoeschst', 
        'ch1': 'Mcl-1 & BAK'
    },
    'cycle4': {
        'ch0': 'Hoeschst', 
        'ch1': 'p-ERK & c-MYC'
    },
    'cycle5': {
        'ch0': 'Hoeschst', 
        'ch1': 'TEAD1 & YAP'
    },
    'cycle6': {
        'ch0': 'Hoeschst',
        'ch1': 'Cyclin E & CDK4',
    },
    'cycle7': {
        'ch0': 'Hoeschst',
        'ch1': 'P-AKT & mTOR',
    },
    'cycle8': {
        'ch0': 'Hoeschst',
        'ch1': 'p-EGFR',
        'ch2': 'Phalloidin',
        'ch3': 'Ki67'
    },
    'cycle9': {
        'ch0': 'Hoeschst',
        'ch1': 'Cyclin D1 & CDK2 - re',
        'ch2': 'NF-Kb & p-P90rsk - re'
    },
    'cycle10': {
        'ch0': 'Hoeschst',
        'ch1': 'NBD-C6',
        'ch2': 'Golph4',
        'ch3': 'COX IV'
    },
    'cycle11': {
        'ch0': 'Hoeschst',
        'ch1': 'Pan-cytokeratin',
    },
    'cycle12': {
        'ch0': 'Hoeschst',
        'ch1': 'Concanavalin A',
        'ch2': 'WGA'
    },
}

def get_info(data_raw, marker_dict = markers_map):
    timepoints = []
    fovs = []
    Zs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 

    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        for name in sorted(filenames):
            if "tif" in name:
                # Get information from image name
                n_split = name.split('_')
                
                time=n_split[0]
                fov=n_split[1]
                z=n_split[3]
                cycle=n_split[4]
                ch = n_split[5][:3]
                try:
                    marker = marker_dict[cycle][ch]
                except:
                    continue 
                    
                timepoints.append(time)
                fovs.append(fov)
                Zs.append(z)
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Timepoint": timepoints,
            "FOV": fovs,
            "Z": Zs,
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df


def get_min(imgs):
    shapes = np.array([np.array(img.shape) for img in imgs])
    return np.min(shapes, axis=0)

In [8]:
save_dir = data_dir / '9PPI Cell Culture'/ '3D_Whole' / 'imgs' / 'registered_norm'

In [9]:
data_raw = save_dir
df_meta_path = data_dir / '9PPI Cell Culture' / '3D_Whole' / 'metadata' / 'info_sti_norm.csv'

try:
    df_meta_path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw)
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Folder is already there
Loaded df


In [10]:
df_imgs_path = data_dir / '9PPI Cell Culture' / '3D_Whole' / 'metadata' / 'imgs_reg_norm.csv'

temp_path =data_dir / '9PPI Cell Culture' / '3D_Whole' / 'hdf5' / 'registered_norm'
try:
    temp_path.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    group = df.groupby(['Timepoint','FOV', 'Z'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        df_group = df_group.sort_values( # Sort by cycle and channels
            by=["Cycle", "Channels", 'Z'],
            key=natsort_keygen()
        )
        
        if file_path.exists():
            continue
        
        channels = df_group.Channels.to_list()
        cycles = df_group.Cycle.to_list()
        markers = df_group.Markers.to_list()
        paths = df_group.Path.to_numpy()
            
        imgs = joblib_loop(read_img, paths)
        min_shape = get_min(imgs)
        imgs_cropped = np.array([img[:min_shape[0], :min_shape[1]] for img in imgs])
        info = {"Cycle": cycles, "Channel": channels, "Marker": markers}
            
            # hdf5 as Channel -> Z mapping
        save_hdf5(file_path, 'imgs', imgs_cropped, info)
    df_imgs = pd.DataFrame(rows, columns=['Timepoint', 'FOV', 'Z', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Folder is already there
Loaded df


# Create segmenation training set

In [39]:
import tensorflow as tf 
from skimage import exposure, util

def random_crop(image, NEW_IMG_HEIGHT, NEW_IMG_WIDTH):
    cropped_image = tf.image.random_crop(
      image, size=[3, NEW_IMG_HEIGHT, NEW_IMG_WIDTH])

    return cropped_image

def contrast_str(img, n_min=0.01, n_max=99.95):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [45]:
cyto_markers = ['p-EGFR', 'Pan-cytokeratin', 'Golph4']
# cyto_markers = ['p-EGFR']

In [46]:
# print(markers)

In [47]:
# napari.view_image(imgs, name=markers, channel_axis=0, visible=False, contrast_limits=[0,1])

In [54]:
df_imgs = df_imgs.astype(str)

In [56]:
whole_seg_path = data_dir / '9PPI Cell Culture'/ '3D_Whole' / 'imgs' / 'segmentation'
crop_seg_path =  data_dir / '9PPI Cell Culture'/ '3D_Whole' / 'imgs' / 'training_seg'

whole_seg_path.mkdir(parents=True, exist_ok=True)
crop_seg_path.mkdir(parents=True, exist_ok=True)

N_crop = 10

for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        imgs = f['imgs'][:]
        markers = f['imgs'].attrs['Marker']
    
    # Get dapi and cyto imgaes
    indices = np.isin(markers, cyto_markers)
    img_dapi = imgs[3]
    imgs_cyto = imgs[indices,:]
    
    # Contrast streching and combine to rgb image
    img_dapi = contrast_str(img_dapi, n_max=99.9)
    # img_cyto = contrast_str(imgs_cyto[0], n_max=99)
    imgs_cyto_scaled = [contrast_str(imgs_cyto[0], n_max=99.9), contrast_str(imgs_cyto[1], n_max=99.9),
                       contrast_str(imgs_cyto[2], n_max=99.9)]
    img_cyto = np.max(np.array(imgs_cyto_scaled), axis=0)
    img_rgb = np.stack([np.zeros(img_dapi.shape),img_cyto, img_dapi], axis=0).astype(np.uint8)
    
    # Crop and save
    file_name = f'{"_".join(row[1:4])}.tif'
    file_path = whole_seg_path / file_name
    tiff.imwrite(file_path, img_rgb)
    
    for i in range(N_crop):
        img_cropped = random_crop(img_rgb, 1000, 1000).numpy().astype(np.uint8)
        
        file_name = f'{"_".join(row[1:4])}_{i}.tif'
        file_path = crop_seg_path / file_name
        tiff.imwrite(file_path, img_cropped)


In [28]:
# img_cropped = random_crop(img_rgb, 1000, 1000).numpy()
# plt.imshow(np.transpose(img_cropped, (1,2,0)))

In [29]:
# napari.view_image(img_rgb, contrast_limits=[0,1])

# Show

In [30]:
df_imgs

Unnamed: 0,Timepoint,FOV,Path
0,HCC827Ctrl,FW1,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
1,HCC827Ctrl,FW2,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
2,HCC827Osim,FW1,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
3,HCC827Osim,FW2,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...


In [31]:
path = df_imgs.iloc[0].Path

In [32]:
with h5py.File(path, "r") as f:
    imgs = f['imgs'][:]
    markers = f['imgs'].attrs['Marker']

In [33]:
import napari

napari.view_image(imgs, channel_axis=0, name=markers, visible=False, contrast_limits=[0,1])

Viewer(axes=Axes(visible=False, labels=True, colored=True, dashed=False, arrows=True), camera=Camera(center=(0.0, 1720.5, 4301.0), zoom=0.09618156457049866, angles=(0.0, 0.0, 90.0), perspective=0, interactive=True), cursor=Cursor(position=(1, 1), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 3442.0, 1.0), (0.0, 8603.0, 1.0)), current_step=(1721, 4301), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'Hoeschst' at 0x24036f0cfa0>, <Image layer 'Cyclin D1 & CDK2' at 0x24036f31130>, <Image layer 'NF-Kb & p-P90rsk' at 0x240370ab4c0>, <Image layer 'Hoeschst [1]' at 0x2403828aa60>, <Image layer 'Sox2 & Oct4' at 0x2403832e040>, <Image layer 'Bim & Tom20' at 0x240383c9490>, <Image layer 'Hoeschst [2]' at 0x24038466970>, <Image layer 'Mcl-1 & BAK' at 0x24038502e80>, <Image layer 'Hoeschst [3]' at 0x240385a63a0>, <Image layer 'p-ERK & c-MYC' at 0x24038642

# Save per z

In [12]:
df_imgs

Unnamed: 0,Timepoint,FOV,Z,Path
0,HCC827Ctrl,FW1,0,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
1,HCC827Ctrl,FW1,1,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
2,HCC827Ctrl,FW1,2,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
3,HCC827Ctrl,FW1,3,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
4,HCC827Ctrl,FW1,4,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
...,...,...,...,...
91,HCC827Osim,FW2,19,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
92,HCC827Osim,FW2,20,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
93,HCC827Osim,FW2,21,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...
94,HCC827Osim,FW2,22,Y:\coskun-lab\Thomas\15_PLA\data\9PPI Cell Cul...


In [None]:
whole_z_path = data_dir / '9PPI Cell Culture'/ '3D_Whole' / 'imgs' / 'registered_z'

whole_z_path.mkdir(parents=True, exist_ok=True)

for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        imgs = f['imgs'][:]
        markers = f['imgs'].attrs['Marker']
    
    # Get dapi and cyto imgaes
    indices = np.isin(markers, cyto_markers)
    img_dapi = imgs[3]
    imgs_cyto = imgs[indices,:]
    
    # Contrast streching and combine to rgb imagea
    img_dapi = contrast_str(img_dapi, n_max=99.9)
    # img_cyto = contrast_str(imgs_cyto[0], n_max=99)
    imgs_cyto_scaled = [contrast_str(imgs_cyto[0], n_max=99.9), contrast_str(imgs_cyto[1], n_max=99.9),
                       contrast_str(imgs_cyto[2], n_max=99.9)]
    img_cyto = np.max(np.array(imgs_cyto_scaled), axis=0)
    img_rgb = np.stack([np.zeros(img_dapi.shape),img_cyto, img_dapi], axis=0).astype(np.uint8)
    
    # Crop and save
    file_name = f'{"_".join(row[1:4])}.tif'
    file_path = whole_seg_path / file_name
    tiff.imwrite(file_path, img_rgb)
    
    for i in range(N_crop):
        img_cropped = random_crop(img_rgb, 1000, 1000).numpy().astype(np.uint8)
        
        file_name = f'{"_".join(row[1:4])}_{i}.tif'
        file_path = crop_seg_path / file_name
        tiff.imwrite(file_path, img_cropped)
