In [1]:
import itertools
import os
import sys
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io

from collections import defaultdict
from tqdm.notebook import trange, tqdm, tqdm_notebook
from joblib import Parallel, delayed
import re
import h5py
import tifffile as tiff


  _warn(("h5py is running against HDF5 {0} when it was built against {1}, "


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
data_dir = (Path().cwd().parents[0] / 'data').absolute()
data_processed = data_dir / 'processed'
data_raw = r'Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\Specificity experiments\PLUS-MINUS specificity'


In [4]:
import os
 
os.environ['JDK_HOME'] = "C:\Program Files\Java\jdk-17"
os.environ['JAVA_HOME'] = "C:\Program Files\Java\jdk-17"

os.environ['PATH'] += ';C:\\Program Files\\Java\\jdk-17\\jre\\bin\\server\\;C:\\Program Files\\Java\\jdk-17\\bin\\;C:\\Program Files\\Java\\jdk-17\\bin\\jar.exe'

# Get info

In [5]:
markers_map = {
    'cycle1': {
        1: 'DNA', 
        4: 'PLUS Perk',
    },
    'cycle2': {
        1: 'DNA', 
        4: 'MINUS Yap',
    },
    'cycle3': {
        1: 'DNA', 
        4: 'Cyclin E/CDK2',
    },
    'cycle4': {
        1: 'DNA',
        2: 'Concanavalin A',
        3: 'Phalloidin',  
        4: 'WGA',
    },
}

def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 
    
    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        if 'after nuclease' in dirpath or 'Test' in dirpath or 'dont use' in dirpath:
            continue
        
            
        for name in sorted(filenames):
            if "tif" in name and "sti" in name \
            and 'overlay' not in name \
            and 'Composite' not in name:
                # Get information from image name
                condition = 'Control'
                                
                d_split = dirpath.split('\\')
                n_split = name.split('_')
                ch = int(n_split[-1][-5])

                cycle = d_split[-1].split('_')[3][-1]
                cycle = 'cycle'+cycle
                try: marker = marker_dict[cycle][ch] 
                except: continue
        
                conditions.append(condition)
                fovs.append(d_split[-1].split('_')[-1])
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            "FOV": fovs,
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

In [66]:
df_meta_path = data_dir / 'specificity_PlusMinus' / 'metadata' / 'info.csv'
df_meta_path.parent.mkdir(parents=True, exist_ok=True)

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map)
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Created df


In [67]:
df

Unnamed: 0,Condition,FOV,Cycle,Channels,Markers,Path
0,Control,FW1,cycle1,1,DNA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
1,Control,FW1,cycle1,4,PLUS Perk,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
2,Control,FW1,cycle2,1,DNA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
3,Control,FW1,cycle2,4,MINUS Yap,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
4,Control,FW1,cycle3,1,DNA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
5,Control,FW1,cycle3,4,Cyclin E/CDK2,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
6,Control,FW1,cycle4,1,DNA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
7,Control,FW1,cycle4,2,Concanavalin A,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
8,Control,FW1,cycle4,3,Phalloidin,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
9,Control,FW1,cycle4,4,WGA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...


# Registration ImageJ

## Registration

In [8]:
import tifffile as tf
from PIL import Image
import PIL.Image
PIL.Image.MAX_IMAGE_PIXELS = 933120000
import shutil
from datetime import date, datetime
import skimage.io 
from skimage import util
from skimage.transform import resize

In [9]:
from skimage import exposure, util

def contrast_str(img, n_min=0.1, n_max=100):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

def make_imgs_same_dim(imgs):
    # Get max dimensions
    shapes = np.array([img.shape[1:] for img in imgs])
    min_x, min_y = shapes.min(axis=0)
    imgs_cropped = [img[:, :min_x, :min_y] for img in imgs]
    # imgs_cropped[0] = contrast_str(imgs_cropped[0])
    return imgs_cropped

In [10]:
regSavePath = data_dir /'specificity_PlusMinus'  /'imgs' / 'registered_imagej'
regSavePath.mkdir(parents=True, exist_ok=True)

chs = [1, 2, 3, 4]
group = df.groupby(['Condition', 'FOV'])
for name, df_group in group:
    path = df_group.iloc[0].Path
    
    for cycle, df_cycle in df_group.groupby(['Cycle']):
        cycle = cycle[-1]
        channel = df_cycle.Channels.tolist()
        imgs = [cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in df_cycle.Path.tolist()]
        
        for ch in chs: 
            # Save path per Channel
            folderPath = os.path.join(regSavePath, '_'.join(name), 'Original', 'CH' + str(ch)) # 1 index
            if not os.path.exists(folderPath):
                os.makedirs(folderPath, exist_ok = True)
            
            fileOut = 'CH' + str(ch) + '_Cycle' + str(cycle).zfill(2) + '.tif'
            fileOut = os.path.join(folderPath, fileOut)
            if os.path.exists(fileOut):
                continue

            if ch in channel:
                if ch == 1:
                    img = contrast_str(imgs[list(channel).index(ch)], n_min=0.1, n_max=99.9)
                else:
                    img = imgs[list(channel).index(ch)]
                tf.imwrite(fileOut, img, photometric = 'minisblack', bigtiff = True)

            else:
                emptyImage = np.zeros(imgs[0].shape, np.uint8)
                # print('Dont exist create empty image', cycle, ch)
                tf.imwrite(fileOut, emptyImage, photometric = 'minisblack', bigtiff = True)



  for cycle, df_cycle in df_group.groupby(['Cycle']):


In [11]:
group = df.groupby(['Condition', 'FOV'])
chs = [1, 2, 3, 4]

for name, channels in group:
    name = '_'.join(name)
    '''
    run("Register Virtual Stack Slices", "source=[Y:/coskun-lab/Nicky/07 Temp/register large stitch] output=[Y:/coskun-lab/Nicky/07 Temp/register output] feature=Rigid registration=[Rigid                -- translate + rotate                  ] advanced shrinkage save save_dir=[Y:/coskun-lab/Nicky/07 Temp/register output] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Rigid registration_model=[Rigid                -- translate + rotate                  ] interpolate");
    run("Transform Virtual Stack Slices", "source = [Y:/coskun-lab/Nicky/07 Temp/other channels/original] output = [Y:/coskun-lab/Nicky/07 Temp/other channels/original] transforms = [Y:/coskun-lab/Nicky/07 Temp/register output] interpolate");
    '''
    # folder to save registered images separated by channel to apply transforms
    # create all folder
    for ii, ch in enumerate(chs): # all channels
        os.makedirs(os.path.join(regSavePath, name, 'Original', 'CH' + str(ch)), exist_ok = True)
        os.makedirs(os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch)), exist_ok = True)
    
    os.chdir(os.path.join(regSavePath, name, 'Original', 'CH1'))
    now = datetime.now() # current date and time
    date_time = now.strftime("%d%b%Y")
    macro = open(date_time + '_register_transforms.ijm', 'w')
    
    # register cycles on CH1
    macro.write('run("Register Virtual Stack Slices", "source=[')
    # original files
    macro.write(os.path.join(regSavePath, name, 'Original', 'CH1').replace('\\', '/'))
    macro.write('] output=[')
    # registered output files
    macro.write(os.path.join(regSavePath, name, 'Registered', 'CH1').replace('\\', '/'))
    
    # Rigid registration: translation + rotation
    macro.write('] feature=Rigid registration=[Rigid                -- translate + rotate                  ] advanced shrinkage save save_dir=[')
    # folder to save recorded transformations 
    macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Rigid registration_model=[Rigid                -- translate + rotate                  ] interpolate"); \n')
    
    # # bigwrap registration
    # macro.write('] feature=Similarity registration=[Elastic              -- bUnwarpJ splines                    ] advanced shrinkage save save_dir=[')
    # # folder to save recorded transformations 
    # macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Similarity registration_model=[[Elastic              -- bUnwarpJ splines                    ] interpolate registration=Mono image_subsample_factor=0 initial_deformation=[Very Coarse] final_deformation=Fine divergence_weight=0.1 curl_weight=0.1 landmark_weight=1 image_weight=0 consistency_weight=0 stop_threshold=0.01 shear=0.95 scale=0.95 isotropy=1"); \n')
    
    # Or use similarity: translation + rotation + isotropic scale
    # macro.write('] feature=Similarity registration=[Similarity           -- translate + rotate + isotropic scale] advanced shrinkage save save_dir=[')
    # # folder to save recorded transformations 
    # macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=25 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=50 inlier_ratio=0.05 feature_extraction_model=Similarity registration_model=[Similarity           -- translate + rotate + isotropic scale] interpolate"); \n')
    
    macro.write('run("Close All"); \n\n')
    
    # now apply transform to other channels
    for ii, ch in enumerate([1,2,3,4]): # each other channel
        
        macro.write('run("Transform Virtual Stack Slices", "source=[')
        # unregsitered folder
        macro.write(os.path.join(regSavePath, name, 'Original', 'CH' + str(ch)).replace('\\', '/'))
        macro.write('] output=[')
        # registered folder
        macro.write(os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch)).replace('\\', '/'))
        macro.write('] transforms=[')
        macro.write(os.path.join(regSavePath, name, 'Original', 'CH1').replace('\\', '/')) # stored in original registration folder
        macro.write('] interpolate"); \n')
        macro.write('run("Close All"); \n\n')
    
    macro.close()
    
    # print command to run macro
    print('runMacro("' + os.path.join(regSavePath, name, 'Original', 'CH1', macro.name).replace('\\', '/') + '");')

runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/specificity_PlusMinus/imgs/registered_imagej/Control_FW1/Original/CH1/18Dec2023_register_transforms.ijm");


## Combine all registered images into single folder

In [32]:
regSavePath = data_dir / 'specificity_PlusMinus' /'imgs' / 'registered_imagej'

regSaveFinalPath = data_dir / 'specificity_PlusMinus' / 'imgs' / 'registered_imagej_final'
regSaveFinalPath.mkdir(parents=True, exist_ok=True)

regSaveCropPath = data_dir / 'specificity_PlusMinus' / 'imgs' /  'registered_crop'
regSaveCropPath.mkdir(parents=True, exist_ok=True)

In [33]:
group = df.groupby(['Condition', 'FOV'])

for name, channels in group:
    name = '_'.join(name)
    for ii, cycle in enumerate(tqdm(channels['Cycle'].unique())): # each cycle
    
        dfCycle = channels.loc[channels['Cycle'] == cycle]
        dfCycle.reset_index(drop = True, inplace = True) # index is channel - 1
        cycle = cycle[5:]
        for jj, ch in enumerate(dfCycle.Channels): # each channel
            
            # find registered file
            tifPath = os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch), 'CH' + str(ch)+ '_Cycle' + str(cycle).zfill(2) + '.tif')

            # File out
            fileOut = 'Cycle' + str(cycle).zfill(2) + \
            '_' + 'CH' + str(ch) + '.tif'
            folder = regSaveFinalPath / name
            folder.mkdir(parents=True, exist_ok=True)
            fileOut = os.path.join(regSaveFinalPath, name, fileOut)
            # print(tifPath)
            # Copy
            if os.path.exists(tifPath):
                shutil.copyfile(tifPath, fileOut)
            else:
                continue

In [34]:
#### Cropped image to smallest bounding box of non black region

# Get channel list
group = df.groupby(['Condition', 'FOV'])

for name, df_group in group:
    channels = df_group.Channels.tolist()
    break

# Crop
for dir in os.listdir(regSaveFinalPath):

    # Read imgs
    imgs = []
    paths = []
    for file in os.listdir(regSaveFinalPath / dir):
        if 'tif' in file:
            path = regSaveFinalPath / dir/ file
            imgs.append(tiff.imread(path))
            paths.append(file)

    # Get bboxs
    bboxs = []
    for i, img in enumerate(imgs):
        if channels[i] != 1:
            continue
        bbox = skimage.measure.regionprops((img>0).astype(np.uint8))[0]['bbox']
        bboxs.append(np.array(bbox))
    bboxs = np.stack(bboxs)

    bbox_final = [np.max(bboxs[:,0]),
                np.max(bboxs[:,1]),
                np.min(bboxs[:,2]),
                np.min(bboxs[:,3])]

    min_row, min_col, max_row, max_col = bbox_final

    # Save cropped images
    save_dir = regSaveCropPath / dir
    save_dir.mkdir(parents=True, exist_ok=True)
    for i, img in enumerate(imgs):
        save_path = save_dir / paths[i]
        tiff.imwrite(save_path, img[min_row:max_row, min_col:max_col], bigtiff = True)

# Save data

In [68]:
from skimage import util
import h5py

def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 

    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        for name in sorted(filenames):
            if "tif" in name:
                # Get information from image name
                n_split = name.split('_')
                                
                cond=dirpath.split('\\')[-1].split('_')[0]
                fov=dirpath.split('\\')[-1].split('_')[1]
                cycle='cycle'+str(int(n_split[0][-2:]))
                ch = int(n_split[1][2])
                try:
                    marker = marker_dict[cycle][ch]
                except:
                    continue 
                    
                conditions.append(cond)
                fovs.append(fov)
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            "FOV": fovs,
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=True,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def get_min(imgs):
    shapes = np.array([np.array(img.shape) for img in imgs])
    return np.min(shapes, axis=0)

In [69]:
data_raw = data_dir /'specificity_PlusMinus' / 'imgs' /  'registered_crop'
df_meta_path = data_dir /  'specificity_PlusMinus'/ 'metadata' / 'info_sti.csv'

try:
    df_meta_path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map)
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Folder is already there
Loaded df


In [70]:
df

Unnamed: 0,Condition,FOV,Cycle,Channels,Markers,Path
0,Control,FW1,cycle1,1,DNA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
1,Control,FW1,cycle1,4,PLUS Perk,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
2,Control,FW1,cycle2,1,DNA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
3,Control,FW1,cycle2,4,MINUS Yap,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
4,Control,FW1,cycle3,1,DNA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
5,Control,FW1,cycle3,4,Cyclin E/CDK2,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
6,Control,FW1,cycle4,1,DNA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
7,Control,FW1,cycle4,2,Concanavalin A,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
8,Control,FW1,cycle4,3,Phalloidin,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
9,Control,FW1,cycle4,4,WGA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...


In [73]:
df_imgs_path = data_dir / 'specificity_PlusMinus' / 'metadata' / 'imgs_reg.csv'
temp_path =data_dir / 'specificity_PlusMinus' / 'hdf5' / 'registered'
try:
    temp_path.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    group = df.groupby(['Condition','FOV'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        # if file_path.exists():
        #     continue
        
        channels = df_group.Channels.to_list()
        cycles = df_group.Cycle.to_list()
        markers = df_group.Markers.to_list()
        paths = df_group.Path.to_numpy()
            
        imgs = joblib_loop(read_img, paths)
        min_shape = get_min(imgs)
        imgs_cropped = np.array([img[:min_shape[0], :min_shape[1]] for img in imgs])
        info = {"Cycle": cycles, "Channel": channels, "Marker": markers}

        imgs_cropped = util.img_as_ubyte(imgs_cropped)
        
        # hdf5 as Channel -> Z mapping
        save_hdf5(file_path, 'imgs', imgs_cropped, info)
    df_imgs = pd.DataFrame(rows, columns=['Condition', 'FOV', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Folder is already there
Created df


  0%|          | 0/1 [00:00<?, ?it/s]

# Segmentation

## Generate images

In [5]:
import napari 
from skimage import exposure, util

def contrast_str(img, n_min=0.1, n_max=99.9):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

In [6]:
df_imgs_path = data_dir / 'specificity_PlusMinus' /'metadata' / 'imgs_reg.csv'
df_imgs = pd.read_csv(df_imgs_path)

In [8]:
cyto_markers = ['Phalloidin']

In [10]:
# Save combined images
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        imgs = f['imgs'][:]
        markers = f['imgs'].attrs['Marker']

    napari.view_image(imgs, name=markers, channel_axis=0)

In [9]:
whole_seg_path = data_dir /  'specificity_PlusMinus' / 'imgs' / 'segmentation'
whole_seg_path.mkdir(parents=True, exist_ok=True)

# Save combined images
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        imgs = f['imgs'][:]
        markers = f['imgs'].attrs['Marker']

    # Get dapi and cyto imgaes
    indices = np.isin(markers, cyto_markers)
    img_dapi = imgs[4]
    imgs_cyto = imgs[indices,:]
    
    # Contrast streching and combine to rgb image
    img_dapi = contrast_str(img_dapi, n_max=99.9)
    imgs_cyto_scaled = [contrast_str(imgs_cyto[0], n_max=99.9)]
    img_cyto = np.max(np.array(imgs_cyto_scaled), axis=0)
    img_rgb = np.stack([np.zeros(img_dapi.shape),img_cyto, img_dapi], axis=0).astype(np.uint8)
    
    # Crop and save
    file_name = f'{"_".join(row[1:3])}.tif'
    file_path = whole_seg_path / file_name
    tiff.imwrite(file_path, img_rgb)