In [2]:
import itertools
import os
import sys
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io

from collections import defaultdict
from tqdm.notebook import trange, tqdm, tqdm_notebook
from joblib import Parallel, delayed
import re
import h5py
import tifffile as tiff
from natsort import natsort_keygen, natsorted

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
data_dir = (Path().cwd().parents[0] / 'data').absolute()
data_processed = data_dir / 'processed'
data_raw = r'Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\PLA\HCC827 cell culture 13 PPIs with nuclease P1'


In [5]:
import os
 
os.environ['JDK_HOME'] = "C:\Program Files\Java\jdk-17"
os.environ['JAVA_HOME'] = "C:\Program Files\Java\jdk-17"

os.environ['PATH'] += ';C:\\Program Files\\Java\\jdk-17\\jre\\bin\\server\\;C:\\Program Files\\Java\\jdk-17\\bin\\;C:\\Program Files\\Java\\jdk-17\\bin\\jar.exe'

# Get info

In [132]:
markers_map = {
    'cycle1': {
        1: 'Hoechst', 
        3: 'Sox2/Oct4',
        4: 'NF-Kb/p-P90rsk',
    },
    'cycle2': {
        1: 'Hoechst', 
        3: 'SIRT1/P53',
        4: 'TRAIL/DR5'
    },
    'cycle3': {
        1: 'Hoechst', 
        3: 'Cyclin D1/CDK4',
        4: 'Bim/Tom20'
    },
    'cycle4': {
        1: 'Hoechst', 
        3: 'EGFR/GRB2',
        4: 'FoxO1/AKT'
    },
    'cycle5': {
        1: 'Hoechst', 
        2: 'p-ERK/c-MYC',
    },
    'cycle6': {
        1: 'Hoechst', 
        4: 'Mcl-1/BAK'
    },
    'cycle7': {
        1: 'Hoechst', 
        4: 'Cyclin E/CDK2'
    },
    'cycle8': {
        1: 'Hoechst', 
        4: 'AKT/Mtor'
    },
    'cycle9': {
        1: 'Hoechst', 
        4: 'TEAD1/YAP'
    },
    'cycle10': {
        1: 'Hoechst', 
        2: 'p-EGFR',
        3: 'Phalloidin',
        4: 'Ki67'
    },
   'cycle11': {
        1: 'Hoechst', 
        2: 'NBD-C6',
        4: 'COX IV'
    },
    'cycle12': {
        1: 'Hoechst', 
        2: 'Pan-cytokeratin',
    },
    'cycle13': {
        1: 'Hoechst', 
        2: 'Concanavalin A',
        4: 'WGA'
    },
}

def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    cycles = []
    channels = []
    markers = []
    rois = []
    z_stacks = []
    paths = [] 
    
    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        # Don't get the nuclease after bleach channel
        if 'after nuclease' in dirpath or 'Test' in dirpath or 'wrong' in dirpath or 'Afcyc10_' in dirpath:
            continue
        
        for name in sorted(filenames):
            if "tif" in name and "sti" not in name \
            and 'overlay' not in name.lower() \
            and 'composite' not in name.lower():
                # Get information from image name      
          
                d_split = dirpath.split('\\')
                condition = d_split[-2].split(' ')[-1]
                fov = d_split[-1].split('_')[-1]
                cycle =  d_split[-1].split('_')[1][3:]
                cycle = 'cycle' + cycle

                n_split = name.split('_')
                ch = int(n_split[-1][-5])
                roi = int(n_split[1])
                z = int(n_split[2][1:])
                try: marker = marker_dict[cycle][ch] 
                except: continue
        
                conditions.append(condition)
                fovs.append(fov)
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                rois.append(roi)
                z_stacks.append(z)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            'FOV': fovs, 
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "ROI": rois,
            "Z": z_stacks,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

In [37]:
df_meta_path = data_dir / '13cyc_3D' / 'metadata' / 'info.csv'
df_meta_path.parent.mkdir(parents=True, exist_ok=True)

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map)
    df = df.sort_values(
        by=["Condition", "FOV", "Cycle", "Channels"],
        key=natsort_keygen()
    )
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Created df


In [38]:
df

Unnamed: 0,Condition,FOV,Cycle,Channels,Markers,ROI,Z,Path
11988,100nM,FW1,cycle1,1,Hoechst,1,1,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
11991,100nM,FW1,cycle1,1,Hoechst,1,2,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
11994,100nM,FW1,cycle1,1,Hoechst,1,3,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
11997,100nM,FW1,cycle1,1,Hoechst,1,4,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
12000,100nM,FW1,cycle1,1,Hoechst,1,5,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
...,...,...,...,...,...,...,...,...
46391,control,FW2,cycle13,4,WGA,18,24,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
46394,control,FW2,cycle13,4,WGA,18,25,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
46397,control,FW2,cycle13,4,WGA,18,26,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."
46400,control,FW2,cycle13,4,WGA,18,27,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\P..."


In [39]:
# df[(df.Condition == 'control') & (df.FOV == 'FW1')].groupby(['Condition', 'FOV', 'ROI', 'Cycle']).size().to_csv('test.csv')

In [40]:
# df.groupby(['Condition', 'FOV', 'ROI']).size().to_csv('3D_z_count.csv')

In [41]:
group = df.groupby(['Condition','FOV'])
for name, df_group in tqdm(group, total=len(group)):
    df_group = df_group.sort_values(
        by=["Cycle", "Channels"],
        key=natsort_keygen()
    )
    print(name)
    print(df_group.Cycle.unique())
    print(df_group.Markers.unique())

  0%|          | 0/4 [00:00<?, ?it/s]

('100nM', 'FW1')
['cycle1' 'cycle2' 'cycle3' 'cycle4' 'cycle5' 'cycle6' 'cycle7' 'cycle8'
 'cycle9' 'cycle10' 'cycle11' 'cycle12' 'cycle13']
['Hoechst' 'Sox2/Oct4' 'NF-Kb/p-P90rsk' 'SIRT1/P53' 'TRAIL/DR5'
 'Cyclin D1/CDK4' 'Bim/Tom20' 'EGFR/GRB2' 'FoxO1/AKT' 'p-ERK/c-MYC'
 'Mcl-1/BAK' 'Cyclin E/CDK2' 'AKT/Mtor' 'TEAD1/YAP' 'p-EGFR' 'Phalloidin'
 'Ki67' 'NBD-C6' 'COX IV' 'Pan-cytokeratin' 'Concanavalin A' 'WGA']
('100nM', 'FW2')
['cycle1' 'cycle2' 'cycle3' 'cycle4' 'cycle5' 'cycle6' 'cycle7' 'cycle8'
 'cycle9' 'cycle10' 'cycle11' 'cycle12' 'cycle13']
['Hoechst' 'Sox2/Oct4' 'NF-Kb/p-P90rsk' 'SIRT1/P53' 'TRAIL/DR5'
 'Cyclin D1/CDK4' 'Bim/Tom20' 'EGFR/GRB2' 'FoxO1/AKT' 'p-ERK/c-MYC'
 'Mcl-1/BAK' 'Cyclin E/CDK2' 'AKT/Mtor' 'TEAD1/YAP' 'p-EGFR' 'Phalloidin'
 'Ki67' 'NBD-C6' 'COX IV' 'Pan-cytokeratin' 'Concanavalin A' 'WGA']
('control', 'FW1')
['cycle1' 'cycle2' 'cycle3' 'cycle4' 'cycle5' 'cycle6' 'cycle7' 'cycle8'
 'cycle9' 'cycle10' 'cycle11' 'cycle12' 'cycle13']
['Hoechst' 'Sox2/Oct4' 'NF-

# Save hdf5

In [140]:
import h5py

def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        chunk_shape = (1,) + data_shape[1:]
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=True,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def test_data_exist(file_path, name):
    with h5py.File(file_path, "r") as h5fout:
        if name in h5fout:
            return True
        else:
            return False
    
def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [152]:
df_imgs_path = data_dir / '13cyc_3D' / 'metadata' / 'imgs.csv'
df_imgs_path.parent.mkdir(parents=True, exist_ok=True)

temp_path = data_dir / '13cyc_3D' / 'hdf5' / 'raw'
temp_path.mkdir(parents=True, exist_ok=True)

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    
    group = df.groupby(['Condition','FOV', 'ROI'])
    rows = []
    
    for name, df_group in tqdm(group, total=len(group)):
        df_group = df_group.sort_values( # Sort by cycle and channels
            by=["Cycle", "Channels", 'Z'],
            key=natsort_keygen()
        )
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        group_channel = df_group.groupby(['Cycle', 'Channels'])
        for n, df_channel in group_channel:
            try:
                if test_data_exist(file_path, '_'.join(np.array(n).astype(str))):
                    continue
            except:pass
            
            marker = df_channel.iloc[0].Markers
            paths = df_channel.Path.to_numpy()

            imgs = joblib_loop(read_img, paths)
            imgs = np.array(imgs)
            info = {"Cycle": n[0], "Channel": n[1], "Marker": marker, "Z": df_channel.Z.to_numpy()}
            
            # hdf5 as Channel -> Z mapping
            save_hdf5(file_path, '_'.join(np.array(n).astype(str)), imgs, info)
    df_imgs = pd.DataFrame(rows, columns=['Condition','FOV', 'ROI', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Loaded df


# Calculate best focus

The best focus for each ROI in one FOV is varying because of the lens heating up. Therefore, we need to extract the individual z stack best focus

In [5]:
from joblib import Parallel, delayed
import cv2 
import scipy 
from collections import defaultdict 

# compute laplacian for each z plane in parallel
def computeLapVar(plane):
    
    var = cv2.Laplacian(plane, cv2.CV_64F, ksize = 31)
    var = np.var(var)
    
    return var

# find focus plane via Laplacian variance
def findFocusLapVar(subStack):
    
    lapVar = Parallel(n_jobs = -1, prefer = 'threads', verbose = 0)(delayed(computeLapVar)(subStack[ii, :, :]) for ii in range(subStack.shape[0]))
    idxFocus = np.argmax(lapVar)

    xRange = np.arange(0, len(lapVar))
    
    # compute steepest gradient in variance to find focus plane
    grad = np.gradient(lapVar)
    grad = np.square(grad)
    
    # extract peaks of gradient
    thresh = np.percentile(grad, 50)
    # peaks with min horizontal distance
    peaks, props = scipy.signal.find_peaks(x = grad, height = thresh, distance = 1)
    heights = props['peak_heights']
    
    # idxFocus = np.argmax(grad) + 2
    if len(peaks) == 0:
        idxFocus = len(lapVar) // 2 # middle
        
    else:
        idxFocus = peaks[0] # tallest peak
        
    if idxFocus > len(lapVar) - 2: # exceeds length, out of bounds
        idxFocus = len(lapVar) - 2
        
    return idxFocus, peaks, lapVar

# register cycles across Z by matching focus planes
def registerAlongZ(fixed, moving): # each ZYX
     
    # match moving to fixed
    focusFixed, peaksFixed, lapVarFixed= findFocusLapVar(fixed) # focus plane of fixed
    focusMoving, peaksMoving, lapVarMoving = findFocusLapVar(moving) # focus plane of moving
    
    # if multiple peaks found, choose closest to middle 
    focusFixed = np.argmax(lapVarFixed)
        
    focusMoving = np.argmax(lapVarMoving)
    
    # print('Fixed focus plane is', focusFixed + 1, '/', fixed.shape[0])
    # print('Moving focus plane is', focusMoving + 1, '/', moving.shape[0])
    shift = focusFixed - focusMoving # z shift applied to moving to match fixed
    
    return shift

In [6]:
df_imgs_path = data_dir / '13cyc_3D' / 'metadata' / 'imgs.csv'
df_imgs = pd.read_csv(df_imgs_path)
df_imgs.head()

Unnamed: 0,Condition,FOV,ROI,Path
0,100nM,FW1,1,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
1,100nM,FW1,2,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
2,100nM,FW1,3,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
3,100nM,FW1,4,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
4,100nM,FW1,5,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...


In [7]:
from collections import defaultdict
# Define fov to compute the z shifts one
z_shifts_all = defaultdict(dict)
group = df_imgs.groupby(['Condition', 'FOV'])

for name, df_group in group:
    # Read imgae and plot one z
    for row in df_group.itertuples():
        path = row.Path
        with h5py.File(path, "r") as f:
            for k in f.keys():
                imgs = f[k][:]
                break

        # Perform z shifts analysis
        z_shifts = defaultdict(list)
        with h5py.File(path, "r") as f:
            for k in tqdm(natsorted(f.keys()), total=len(f.keys()), leave=False):
                cycle = k.split('_')[0]
                channel = f[k].attrs['Channel']

                if channel == 1:
                    imgs = f[k][:]

                    if cycle == 'cycle1':
                        fixed = imgs
                        z_shifts['cycle1'] = 0
                        continue
                    else:
                        moving = imgs
                    shift = registerAlongZ(fixed, moving)
                    
                    z_shifts[cycle] = shift
        z_shifts_all['_'.join(name)][row.ROI] = z_shifts

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

In [8]:
z_shifts_all

defaultdict(dict,
            {'100nM_FW1': {1: defaultdict(list,
                          {'cycle1': 0,
                           'cycle2': 2,
                           'cycle3': 3,
                           'cycle4': 2,
                           'cycle5': -1,
                           'cycle6': 0,
                           'cycle7': 2,
                           'cycle8': 0,
                           'cycle9': 2,
                           'cycle10': 2,
                           'cycle11': 17,
                           'cycle12': 15,
                           'cycle13': 0}),
              2: defaultdict(list,
                          {'cycle1': 0,
                           'cycle2': 2,
                           'cycle3': 3,
                           'cycle4': 3,
                           'cycle5': 0,
                           'cycle6': 1,
                           'cycle7': 2,
                           'cycle8': 2,
                           'cycle9': 4,
          

In [11]:
import pickle 

z_shift_path = data_dir / '13cyc_3D' / 'metadata' / '13cyc_3D_zshift.pickle'

with open(z_shift_path, 'wb') as f:
    pickle.dump(z_shifts_all, f)

In [19]:
# # Read images
# cycles = []
# imgs_all = []
# channels = []

# with h5py.File(path, "r") as f:
#     for k in tqdm(natsorted(f.keys()), total=len(f.keys()), leave=False):
#         cycle = k.split('_')[0]
#         channel = f[k].attrs['Channel']

#         imgs = f[k][:]
#         if channel == 1:
#             imgs = contrast_str(imgs)
#         cycles.append(cycle)
#         channels.append(channel)
#         imgs_all.append(imgs)

#     cycles = np.array(cycles)
#     channels = np.array(channels)
    
# # Get imgs_stacked
# imgs_matched_z = []
# shift_min = np.min(list(z_shifts.values()))
# shift_max = np.max(list(z_shifts.values()))

# for i, img in enumerate(imgs_all):
#     z = z_shifts[cycles[i]]
#     imgs_matched_z.append(img[shift_max-z:])

# # Get imgs_stacked
# z_min = np.min([i.shape[0] for i in imgs_matched_z])
# imgs_stacked = np.stack([i[:z_min] for i in imgs_matched_z])

In [20]:
# import napari

# napari.view_image(imgs_stacked)

# Save Tiffile

In [103]:
import tifffile as tiff
from skimage import exposure, util
import pickle 

def contrast_str(img, n_min=0.01, n_max=99.9):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98), out_range=np.uint16)
    return img_rescale

def make_imgs_same_dim(imgs):
    # Get max dimensions
    shapes = np.array([img.shape[1:] for img in imgs])
    min_x, min_y = shapes.min(axis=0)
    imgs_cropped = [img[:, :min_x, :min_y] for img in imgs]
    for i, img in enumerate(imgs_cropped):
        imgs_cropped[i][0,...] = contrast_str(imgs_cropped[i][0,...])
    return imgs_cropped

In [104]:
save_path = data_dir / '13cyc_3D' / 'imgs' / 'raw_per_cycle'
save_path.mkdir(parents=True, exist_ok=True)

df_imgs_path = data_dir / '13cyc_3D' / 'metadata' / 'imgs.csv'
df_imgs = pd.read_csv(df_imgs_path)

In [105]:

# group = df_imgs.groupby(['Condition','FOV'])
# for name, df_group in group:
    
#     z_shifts = z_shifts_all['_'.join(name)]
        
#     for i, row in tqdm(enumerate(df_group.itertuples()), total=len(df_group)):
#         path = row.Path

#         # Read images
#         cycles = []
#         imgs_all = []
#         channels = []
#         with h5py.File(path, "r") as f:
#             for k in tqdm(natsorted(f.keys()), total=len(f.keys()), leave=False):
#                 cycle = k.split('_')[0]
#                 channel = f[k].attrs['Channel']

#                 imgs = f[k][:]
#                 if channel == 1:
#                     imgs = contrast_str(imgs)
#                 cycles.append(cycle)
#                 channels.append(channel)
#                 imgs_all.append(imgs)
                       
#         cycles = np.array(cycles)
#         channels = np.array(channels)

#         # Register by z focus
#         imgs_matched_z = []
#         shift_min = np.min(list(z_shifts.values()))
#         shift_max = np.max(list(z_shifts.values()))

#         for i, img in enumerate(imgs_all):
#             z = z_shifts[cycles[i]]
#             imgs_matched_z.append(img[shift_max-z:])

#         # Get imgs_stacked
#         z_min = np.min([i.shape[0] for i in imgs_matched_z])
#         imgs_stacked = np.stack([i[:z_min] for i in imgs_matched_z])

#         for cycle in np.unique(cycles):
#             indices = np.where(cycles == cycle)[0]
#             imgs = imgs_stacked[indices,...]
            
#             for z in range(imgs.shape[1]):
#                  # Create temp path
#                 name = [row[1], row[2], z]
#                 temp_path =  save_path / '_'.join(np.array(name).astype(str))
#                 temp_path.mkdir(parents=True, exist_ok=True)

#                 file_name = '_'.join(np.array(["{:03d}".format(row[3]), cycle]).astype(str)) + '.tif'
#                 file_path = temp_path / file_name

#                 # if os.path.exists(file_path):
#                 #     continue
                
#                 # Write image
#                 tiff.imwrite(file_path, imgs[:,z,...])

In [106]:
import pickle 

z_shift_path = data_dir / '13cyc_3D' / 'metadata' / '13cyc_3D_zshift.pickle'

with open(z_shift_path, 'rb') as f:
    z_shifts_all = pickle.load(f)

In [107]:
df_imgs = df_imgs[(df_imgs.Condition == 'control') & (df_imgs.FOV == 'FW1')]

group = df_imgs.groupby(['Condition','FOV'])
for name, df_group in group:
    
    z_shifts_cond = z_shifts_all['_'.join(name)]
        
    for i, row in tqdm(enumerate(df_group.itertuples()), total=len(df_group)):
        path = row.Path
        if row.ROI >  15:
            z_shifts = z_shifts_cond[15]
        else:
            z_shifts = z_shifts_cond[row.ROI]

        # Read images
        cycles = []
        imgs_all = []
        channels = []
        with h5py.File(path, "r") as f:
            for k in tqdm(natsorted(f.keys()), total=len(f.keys()), leave=False):
                cycle = k.split('_')[0]
                channel = f[k].attrs['Channel']

                imgs = f[k][:]
                if channel == 1:
                    imgs = contrast_str(imgs)
                cycles.append(cycle)
                channels.append(channel)
                imgs_all.append(imgs)
                       
        cycles = np.array(cycles)
        channels = np.array(channels)

        # Z Focus matching
        imgs_matched_z = []
        shift_min = np.min(list(z_shifts.values()))
        shift_max = np.max(list(z_shifts.values()))

        for i, img in enumerate(imgs_all):
            z = z_shifts[cycles[i]]
            imgs_matched_z.append(img[shift_max-z:])

        # Get imgs_stacked
        z_min = np.min([i.shape[0] for i in imgs_matched_z])
        imgs_stacked = np.stack([i[:z_min] for i in imgs_matched_z])

        for cycle in np.unique(cycles):
            indices = np.where(cycles == cycle)[0]
            imgs = imgs_stacked[indices,...]
            
            for z in range(imgs.shape[1]):
                 # Create temp path
                name = [row[1], row[2], cycle]
                temp_path =  save_path / '_'.join(np.array(name).astype(str))
                temp_path.mkdir(parents=True, exist_ok=True)

                file_name = '_'.join(np.array(["{:03d}".format(row[3]), 'Z'+str(z)]).astype(str)) + '.tif' # Save the z with cycle because of ashlar
                file_path = temp_path / file_name

                # if os.path.exists(file_path):
                #     continue
                
                # Write image
                tiff.imwrite(file_path, imgs[:,z,...])

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

## Copy file into new folder by matching the maximum and minium z stacks

In [119]:
import shutil
img_dir = data_dir / '13cyc_3D' / 'imgs' / 'raw_per_cycle'
save_dir = data_dir /  '13cyc_3D' / 'imgs' / 'raw_matched_z'
save_dir.mkdir(parents=True, exist_ok=True)

for condition in os.listdir(img_dir):
    print(condition)
    if 'control_FW1' not in condition:
        continue
    # Get files
    files = os.listdir(img_dir / condition)
    files = [f for f in files if 'tif' in f ]
    # Define savepath
    save_path = save_dir / condition 
    save_path.mkdir(parents=True, exist_ok=True)

    # Create temp dataframe
    df_temp = pd.DataFrame({'ROI': [f.split('_')[0] for f in files],
                        'Z': [f.split('_')[1].split('.')[0][1:] for f in files],
                        'Path': files})
    df_temp = df_temp.sort_values(by=['ROI', 'Z'], key=natsort_keygen())
    df_temp.Z = df_temp.Z.astype(np.uint8)
    min_z = df_temp.groupby(['ROI'])['Z'].max().min()
    
    # Iterate over ROI
    group = df_temp.groupby(['ROI'])
    for name, df_group in group:
        z_diff = df_group.Z.max() - min_z
        div = z_diff // 2
        rest = z_diff % 2
        df_group = df_group.iloc[div+rest:-1-div]
        df_group = df_group.reset_index(drop=True)
        for row in df_group.itertuples():
            source = img_dir / condition / row[-1]
            destination = save_path / ('_'.join([row[1], 'cycle'+str(row[0])]) + '.tif')
            # if os.path.exists(destination):
            #     continue
            shutil.copy(source, destination)


100nM_FW1_cycle1
100nM_FW1_cycle10
100nM_FW1_cycle11
100nM_FW1_cycle12
100nM_FW1_cycle13
100nM_FW1_cycle2
100nM_FW1_cycle3
100nM_FW1_cycle4
100nM_FW1_cycle5
100nM_FW1_cycle6
100nM_FW1_cycle7
100nM_FW1_cycle8
100nM_FW1_cycle9
100nM_FW2_cycle1
100nM_FW2_cycle10
100nM_FW2_cycle11
100nM_FW2_cycle12
100nM_FW2_cycle13
100nM_FW2_cycle2
100nM_FW2_cycle3
100nM_FW2_cycle4
100nM_FW2_cycle5
100nM_FW2_cycle6
100nM_FW2_cycle7
100nM_FW2_cycle8
100nM_FW2_cycle9
control_FW1_cycle1
control_FW1_cycle10
control_FW1_cycle11
control_FW1_cycle12
control_FW1_cycle13
control_FW1_cycle2
control_FW1_cycle3
control_FW1_cycle4
control_FW1_cycle5
control_FW1_cycle6
control_FW1_cycle7
control_FW1_cycle8
control_FW1_cycle9
control_FW2_cycle1
control_FW2_cycle10
control_FW2_cycle11
control_FW2_cycle12
control_FW2_cycle13
control_FW2_cycle2
control_FW2_cycle3
control_FW2_cycle4
control_FW2_cycle5
control_FW2_cycle6
control_FW2_cycle7
control_FW2_cycle8
control_FW2_cycle9


## Register

In [120]:
from ashlar import fileseries, thumbnail,reg
import matplotlib.pyplot as plt
from ashlar.scripts.ashlar import process_axis_flip

In [121]:
# # Loop all images
# thumb_dir = data_dir / '13cyc_3D' / 'thumbnails'
# imgs_dir = data_dir / '13cyc_3D' / 'imgs' / 'raw_norm'
# save_dir = data_dir / '13cyc_3D' / 'imgs' / 'registered_norm'

# save_dir .mkdir(parents=True, exist_ok=True)
# imgs_dir_list = os.listdir(imgs_dir)
# thumb_dir.mkdir(parents=True, exist_ok=True)

# for dir_path in tqdm(imgs_dir_list):
        
#     # Create reader for each cycle
#     readers = []
#     for i in range(1, 14):
#         reader = fileseries.FileSeriesReader(
#             str(imgs_dir / dir_path),
#             pattern='{series}_cycle'+f'{i}.tif',
#             overlap=0.29,
#             width=6,
#             height=3,
#             layout='snake',
#             direction='horizontal',
#             pixel_size=0.18872, 
#         )
#         readers.append(reader)
#     reader_1 = readers[0]
    
#     # Run stitching
#     aligner0 = reg.EdgeAligner(reader_1, channel=0, filter_sigma=1, max_shift=50, verbose=False,)
#     aligner0.run()
    
#     # Generate merge image for 1 cycle
#     # Parramter
#     mosaic_args = {}
#     mosaic_args['verbose'] = False

#     mosaic = reg.Mosaic(
#             aligner0,aligner0.mosaic_shape,**mosaic_args
#         )
#     writer_class = reg.TiffListWriter
#     writer = writer_class(
#             [mosaic], str(save_dir / (dir_path + '_cycle1_ch{channel}.ome.tif'))
#     )
#     writer.run()
    
#     # Loop through rest of cycles
#     aligners = list()
#     aligners.append(aligner0)

#     for j in range(1, len(readers)):
#         aligners.append(
#             reg.LayerAligner(readers[j], aligners[0], channel=0, filter_sigma=1,  max_shift=50, verbose=False)
#         )
#         aligners[j].run()
#         mosaic = reg.Mosaic(
#             aligners[j], aligners[0].mosaic_shape,**mosaic_args
#         )
#         writer = writer_class(
#                 [mosaic], str(save_dir / (dir_path +'_cycle'+str(j+1)+'_ch{channel}.ome.tif'))
#         )
#         writer.run()
#     break

In [124]:
# Loop all images
thumb_dir = data_dir / '13cyc_3D' / 'thumbnails'
imgs_dir = data_dir / '13cyc_3D' / 'imgs' / 'raw_matched_z'
save_dir = data_dir / '13cyc_3D' / 'imgs' / 'registered_per_cycle'

save_dir .mkdir(parents=True, exist_ok=True)
imgs_dir_list = os.listdir(imgs_dir)
thumb_dir.mkdir(parents=True, exist_ok=True)

for dir_path in tqdm(imgs_dir_list):
    l =  [f for f in os.listdir(imgs_dir / dir_path) if 'tif' in f]
    z = [int(f.split('cycle')[1].split('.')[0]) for f in l]
    n = np.max(z)+1
    
    if os.path.exists(save_dir / (dir_path + '_z1_ch0.ome.tif')):
        continue

    # Create reader for each cycle
    readers = []
    for i in range(0, n):
        reader = fileseries.FileSeriesReader(
            str(imgs_dir / dir_path),
            pattern='{series}_cycle'+f'{i}.tif',
            overlap=0.29,
            width=6,
            height=3,
            layout='snake',
            direction='horizontal',
            pixel_size=0.18872, 
        )
        readers.append(reader)
    reader_1 = readers[0]
    
    # Run stitching
    aligner0 = reg.EdgeAligner(reader_1, channel=0, filter_sigma=2, verbose=False,)
    aligner0.run()
    
    # Generate merge image for 1 cycle
    # Parramter
    mosaic_args = {}
    mosaic_args['verbose'] = False

    mosaic = reg.Mosaic(
            aligner0,aligner0.mosaic_shape,**mosaic_args
        )
    writer_class = reg.TiffListWriter
    writer = writer_class(
            [mosaic], str(save_dir / (dir_path + '_z1_ch{channel}.ome.tif'))
    )
    writer.run()
    
    # Loop through rest of cycles
    aligners = list()
    aligners.append(aligner0)

    for j in range(1, len(readers)):
        aligners.append(
            reg.LayerAligner(readers[j], aligners[0], channel=0, filter_sigma=2, verbose=False)
        )
        aligners[j].run()
        mosaic = reg.Mosaic(
            aligners[j], aligners[0].mosaic_shape,**mosaic_args
        )
        writer = writer_class(
                [mosaic], str(save_dir / (dir_path +'_z'+str(j+1)+'_ch{channel}.ome.tif'))
        )
        writer.run()


  0%|          | 0/52 [00:00<?, ?it/s]

    assembling thumbnail 18/18
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle offset [y x] = [0. 0.]
    assembling thumbnail 18/18
    estimated cycle

# Rescale data for FOV 1 and save in corresponding folder for registration

The images are stitched based on cycle and now we can proceed with inter-cycle registration and save the data in good folder structure.

In [125]:
from skimage.transform import resize
from skimage import util

imgs_dir = data_dir / '13cyc_3D' / 'imgs' / 'registered_per_cycle'
imgs_dir_list = os.listdir(imgs_dir)

save_dir = data_dir / '13cyc_3D' / 'imgs' / 'raw_stitched'
save_dir.mkdir(parents=True, exist_ok=True)

In [126]:
df_dir = pd.DataFrame([x.split('_')for x in imgs_dir_list])
df_dir.columns = ['Condition', 'FOV', 'Cycle', 'Z', 'Channel']
df_dir['Path'] = imgs_dir_list
df_dir['Channel'] = df_dir['Channel'].transform(lambda x: x.split('.')[0])
df_dir = df_dir.sort_values(
    by=['Condition','FOV', "Cycle", 'Z', 'Channel'],
    key=natsort_keygen()
)
df_dir.head()

Unnamed: 0,Condition,FOV,Cycle,Z,Channel,Path
108,100nM,FW1,cycle1,z1,ch0,100nM_FW1_cycle1_z1_ch0.ome.tif
109,100nM,FW1,cycle1,z1,ch1,100nM_FW1_cycle1_z1_ch1.ome.tif
110,100nM,FW1,cycle1,z1,ch2,100nM_FW1_cycle1_z1_ch2.ome.tif
111,100nM,FW1,cycle1,z2,ch0,100nM_FW1_cycle1_z2_ch0.ome.tif
112,100nM,FW1,cycle1,z2,ch1,100nM_FW1_cycle1_z2_ch1.ome.tif


In [127]:

group = df_dir.groupby(['Condition', 'FOV', 'Z'])

for name, df_group in group:
    name = '_'.join(name)

    # Define temp save path
    temp_path =  save_dir / name
    temp_path.mkdir(parents=True, exist_ok=True)

    # Read cycle 2 dapi imgs dimension
    path = df_group[(df_group.Cycle == 'cycle2') & (df_group.Channel == 'ch0')].Path.item()
    dim = skimage.io.imread(imgs_dir / path).shape

    cycle = df_group.groupby(['Cycle'])
    for cycle, df_cycle in cycle:
        imgs = np.stack([skimage.io.imread(imgs_dir / f) for f in df_cycle.Path.tolist()])

        # Rescale if its channel 1
        if cycle == 'cycle1':
            imgs = resize(imgs, (len(imgs), dim[0], dim[1]))
            imgs = util.img_as_uint(imgs)
        else:
            imgs = imgs[:, :dim[0], :dim[1]]
        # Save image
        file_name = '_'.join(['001', cycle]) + '.tif' # Save the z with cycle because of ashlar
        file_path = temp_path / file_name

        tiff.imwrite(file_path, imgs)

## Register again

In [128]:
from ashlar import fileseries, thumbnail,reg
import matplotlib.pyplot as plt
from ashlar.scripts.ashlar import process_axis_flip

In [129]:
# Loop all images
imgs_dir = data_dir / '13cyc_3D' / 'imgs' / 'raw_stitched'
save_dir = data_dir / '13cyc_3D' / 'imgs' / 'registered_final'

save_dir .mkdir(parents=True, exist_ok=True)
imgs_dir_list = os.listdir(imgs_dir)

for dir_path in tqdm(imgs_dir_list):
        
    # Create reader for each cycle
    readers = []
    for i in range(1, 14):
        reader = fileseries.FileSeriesReader(
            str(imgs_dir / dir_path),
            pattern='{series}_cycle'+f'{i}.tif',
            overlap=0.29,
            width=1,
            height=1,
            layout='snake',
            direction='horizontal',
            pixel_size=0.18872, 
        )
        readers.append(reader)
    reader_1 = readers[0]
    
    # Run stitching
    aligner0 = reg.EdgeAligner(reader_1, channel=0, filter_sigma=2, verbose=False,)
    aligner0.run()
    
    # Generate merge image for 1 cycle
    # Parramter
    mosaic_args = {}
    mosaic_args['verbose'] = False

    mosaic = reg.Mosaic(
            aligner0,aligner0.mosaic_shape,**mosaic_args
        )
    writer_class = reg.TiffListWriter
    writer = writer_class(
            [mosaic], str(save_dir / (dir_path + '_cycle1_ch{channel}.ome.tif'))
    )
    writer.run()
    
    # Loop through rest of cycles
    aligners = list()
    aligners.append(aligner0)

    for j in range(1, len(readers)):
        aligners.append(
            reg.LayerAligner(readers[j], aligners[0], channel=0, filter_sigma=2,verbose=False)
        )
        aligners[j].run()
        mosaic = reg.Mosaic(
            aligners[j], aligners[0].mosaic_shape,**mosaic_args
        )
        writer = writer_class(
                [mosaic], str(save_dir / (dir_path +'_cycle'+str(j+1)+'_ch{channel}.ome.tif'))
        )
        writer.run()


  0%|          | 0/36 [00:00<?, ?it/s]

    assembling thumbnail 1/1
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-20.   6.]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-12.999999  22.      ]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-14.  20.]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-16.   2.]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-16.   2.]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-20.        25.999998]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-12.999999   5.      ]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-1.  0.]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-16.        12.999999]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-20.999998  17.      ]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-5. 23.]
    assembling thumbnail 1/1
    estimated cycle offset [y x] = [-12.999999  20

# Save data

In [133]:
def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    Zs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 

    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        for name in natsorted(filenames):
            if "tif" in name:
                # Get information from image name
                n_split = name.split('_')
                                
                cond=n_split[0]
                fov=n_split[1]
                z=n_split[2]
                cycle=n_split[3]
                ch = n_split[4][:3]
                try:
                    marker = marker_dict[cycle][ch]
                except:
                    continue 
                    
                conditions.append(cond)
                fovs.append(fov)
                Zs.append(z)
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            "FOV": fovs,
            "Z": Zs,
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def get_min(imgs):
    shapes = np.array([np.array(img.shape) for img in imgs])
    return np.min(shapes, axis=0)

In [134]:
from collections import defaultdict

markers_map_new = defaultdict(dict)
for k,v in markers_map.items():
    for i, (ch,marker) in enumerate(v.items()):
        markers_map_new[k][f'ch{i}'] = marker
        

In [135]:
data_raw = data_dir  / '13cyc_3D'  /'imgs' /  'registered_final'
df_meta_path = data_dir / '13cyc_3D'  / 'metadata' / 'info_sti.csv'

try:
    df_meta_path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map_new)
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Folder is already there
Created df


In [136]:
df

Unnamed: 0,Condition,FOV,Z,Cycle,Channels,Markers,Path
0,100nM,FW1,z1,cycle1,ch0,Hoechst,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
1,100nM,FW1,z1,cycle1,ch1,Sox2/Oct4,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
2,100nM,FW1,z1,cycle1,ch2,NF-Kb/p-P90rsk,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
3,100nM,FW1,z1,cycle2,ch0,Hoechst,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
4,100nM,FW1,z1,cycle2,ch1,SIRT1/P53,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
...,...,...,...,...,...,...,...
1219,control,FW2,z9,cycle12,ch0,Hoechst,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
1220,control,FW2,z9,cycle12,ch1,Pan-cytokeratin,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
1221,control,FW2,z9,cycle13,ch0,Hoechst,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
1222,control,FW2,z9,cycle13,ch1,Concanavalin A,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...


In [137]:
# Get the min dimension accross all z stacks 
group = df.groupby(['Condition','FOV'])
min_dim = {}

for name, df_group in tqdm(group, total=len(group)):

    df_group = df_group.sort_values( # Sort by cycle and channels
        by=["Cycle", "Channels", 'Z'],
        key=natsort_keygen()
    )
    df_group = df_group[(df_group.Cycle == 'cycle1') & (df_group.Channels == 'ch1')]

    channels = df_group.Channels.to_list()
    cycles = df_group.Cycle.to_list()
    markers = df_group.Markers.to_list()
    paths = df_group.Path.to_numpy()

    imgs = joblib_loop(read_img, paths)
    min_dim['_'.join(name)] = get_min(imgs)

  0%|          | 0/4 [00:00<?, ?it/s]

In [138]:
min_dim

{'100nM_FW1': array([3439, 8589]),
 '100nM_FW2': array([3447, 8577]),
 'control_FW1': array([3446, 8595]),
 'control_FW2': array([3445, 8597])}

In [141]:
df_imgs_path = data_dir / '13cyc_3D'/'metadata' / 'imgs_reg.csv'

temp_path =data_dir / '13cyc_3D' / 'hdf5' / 'registered'
try:
    temp_path.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    group = df.groupby(['Condition','FOV', 'Z'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        df_group = df_group.sort_values( # Sort by cycle and channels
            by=["Cycle", "Channels", 'Z'],
            key=natsort_keygen()
        )
        
        if file_path.exists():
            continue
        
        channels = df_group.Channels.to_list()
        cycles = df_group.Cycle.to_list()
        markers = df_group.Markers.to_list()
        paths = df_group.Path.to_numpy()
            
        imgs = joblib_loop(read_img, paths)
        min_shape = min_dim['_'.join(name[:2])]
        imgs_cropped = np.array([img[:min_shape[0], :min_shape[1]] for img in imgs])
        info = {"Cycle": cycles, "Channel": channels, "Marker": markers}
            
            # hdf5 as Channel -> Z mapping
        save_hdf5(file_path, 'imgs', imgs_cropped, info)
    df_imgs = pd.DataFrame(rows, columns=['Condition', 'FOV', 'Z', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Folder is already there
Created df


  0%|          | 0/36 [00:00<?, ?it/s]

# Generate images

In [142]:
from skimage.exposure import rescale_intensity
from skimage import exposure, util, filters, restoration

def contrast_str(img, n_min=10, n_max=100):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

In [143]:
df_imgs_path = data_dir / '13cyc_3D'/'metadata' / 'imgs_reg.csv'
df_imgs = pd.read_csv(df_imgs_path)
df_imgs.head()

Unnamed: 0,Condition,FOV,Z,Path
0,100nM,FW1,z1,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
1,100nM,FW1,z2,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
2,100nM,FW1,z3,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
3,100nM,FW1,z4,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...
4,100nM,FW1,z5,y:\coskun-lab\Thomas\23_PLA_revision\data\13cy...


In [144]:
cyto_markers = ['p-EGFR', 'Phalloidin']

In [145]:
df_imgs['Z'] = df_imgs['Z'].astype(str)

In [146]:
whole_seg_path = data_dir /  '13cyc_3D' / 'imgs' / 'segmentation'
whole_seg_path.mkdir(parents=True, exist_ok=True)

# Save combined images
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        imgs = f['imgs'][:]
        markers = f['imgs'].attrs['Marker']

    # Get dapi and cyto imgaes
    indices = np.isin(markers, cyto_markers)
    img_dapi = imgs[0]
    imgs_cyto = imgs[indices,:]
    
    # Contrast streching and combine to rgb image
    img_dapi = contrast_str(img_dapi, n_max=99.9)
    imgs_cyto_scaled = [contrast_str(imgs_cyto[0], n_max=99.9), contrast_str(imgs_cyto[1], n_max=99.9)]
    img_cyto = np.max(np.array(imgs_cyto_scaled), axis=0)
    img_rgb = np.stack([np.zeros(img_dapi.shape),img_cyto, img_dapi], axis=0).astype(np.uint8)
    
    # Crop and save
    file_name = f'{"_".join(row[1:4])}.tif'
    file_path = whole_seg_path / file_name
    tiff.imwrite(file_path, img_rgb)

# Segmentation using cellpose

In [9]:
from skimage import exposure, util
from cellpose import core, utils, io, models, metrics

use_GPU = core.use_gpu()
yn = ['NO', 'YES']
print(f'>>> GPU activated? {yn[use_GPU]}')

def contrast_str(img, n_min=0.1, n_max=99.95):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

>>> GPU activated? YES


In [10]:
whole_seg_path = data_dir /  '13cyc_3D' / 'imgs' / 'segmentation'

mask_path = data_dir  / '13cyc_3D'/ 'imgs' / 'masks'
mask_path.mkdir(parents=True, exist_ok=True)

In [11]:
# Cyto segmentaion
masks = []
for p in os.listdir(whole_seg_path):
    if 'tif' not in p:
        continue
    img = skimage.io.imread(whole_seg_path / p).transpose((2,0,1))
    
    # Cyto segmentation
    model = models.CellposeModel(gpu=True, model_type='CP')
    mask_cyto, flows, styles = model.eval(img, 
                                  channels=[2,3],
                                  diameter=150,
                                  flow_threshold=0.8,
                                  cellprob_threshold=-3
                                  )
    
    file_path = mask_path / p
    tiff.imwrite(file_path, mask_cyto)
    
    # Nuclei segemtnation
    model = models.CellposeModel(gpu=True, model_type='nuclei')
    mask_nuclei, flows, styles = model.eval(img, 
                                  channels=[3,3],
                                  diameter=100,
                                  flow_threshold=0.6,
                                  )
    
        
    file_path = mask_path / f'Nuclei_{p}'
    tiff.imwrite(file_path, mask_nuclei)


: 

# 3D segmentation

In [16]:
from natsort import natsorted
import tifffile as tiff
import glob 

def make_imgs_same_dim(imgs):
    # Get max dimensions
    shapes = np.array([img.shape[1:] for img in imgs])
    min_x, min_y = shapes.min(axis=0)
        
    return [img[:, :min_x, :min_y] for img in imgs]

In [9]:
from skimage import exposure, util
from cellpose import core, utils, io, models, metrics

use_GPU = core.use_gpu()
yn = ['NO', 'YES']
print(f'>>> GPU activated? {yn[use_GPU]}')

def contrast_str(img, n_min=0.1, n_max=99.95):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

>>> GPU activated? YES


In [11]:
whole_seg_path = data_dir /  '13cyc_3D' / 'imgs' / 'segmentation'

mask_path = data_dir  / '13cyc_3D'/ 'imgs' / 'masks_3D'
mask_path.mkdir(parents=True, exist_ok=True)

In [12]:
df_meta_path = data_dir /'13cyc_3D'  / 'metadata' / 'imgs_reg.csv'
df_imgs = pd.read_csv(df_meta_path)

In [17]:
group = df_imgs.groupby(['Condition', 'FOV'])
n_limit = 1

In [152]:
import glob 

for name, df_group in group:
    exp = '_'.join(name)
    f_names = glob.glob(str(whole_seg_path)+f'\\{exp}*')
    f_names = natsorted(f_names)
    
    # Read imgs
    imgs = [skimage.io.imread(f).transpose(2, 0, 1) for f in f_names]
    imgs = np.stack(make_imgs_same_dim(imgs))
    imgs_middle = imgs[n_limit:-n_limit]
    
    # declare model
    model = models.CellposeModel(gpu=True, model_type='CP')

    # run model on test images
    masks, flows, styles = model.eval(imgs_middle, 
                                  channels=[2,3],
                                  diameter=150,
                                #   flow_threshold=0.8,
                                #   cellprob_threshold=-3,
                                  flow_threshold=0.3,
                                  do_3D=True,
                                  anisotropy= 2.1, 
                                  )
    
    file_path = mask_path / f'{exp}.tif'
    tiff.imwrite(file_path, masks)
    
    # declare model
    model = models.CellposeModel(gpu=True, model_type='nuclei')
    # run model on test images
    masks, flows, styles = model.eval(imgs_middle, 
                                  channels=[3,0],
                                  diameter=100,
                                #   flow_threshold=0.3,
                                  do_3D=True,
                                  anisotropy= 2.1, 
                                  )
    
    file_path = mask_path / f'Nuclei_{exp}.tif'
    tiff.imwrite(file_path, masks)