In [1]:
import itertools
import os
import sys
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io

from collections import defaultdict
from tqdm.notebook import trange, tqdm, tqdm_notebook
from joblib import Parallel, delayed
import re
import h5py
import tifffile as tiff


c:\Users\thu71\AppData\Local\anaconda3\envs\cellpose\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
c:\Users\thu71\AppData\Local\anaconda3\envs\cellpose\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
data_dir = (Path().cwd().parents[0] / 'data').absolute()
data_processed = data_dir / 'processed'
data_raw = r'Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\Specificity experiments'


In [4]:
import os
 
os.environ['JDK_HOME'] = "C:\Program Files\Java\jdk-17"
os.environ['JAVA_HOME'] = "C:\Program Files\Java\jdk-17"

os.environ['PATH'] += ';C:\\Program Files\\Java\\jdk-17\\jre\\bin\\server\\;C:\\Program Files\\Java\\jdk-17\\bin\\;C:\\Program Files\\Java\\jdk-17\\bin\\jar.exe'

# Get info

In [5]:
markers_map = {
    'cycle1': {
        1: 'DNA', 
        3: 'cyclin D1/CDK4',
        4: 'Bim/Tom20',
    },
}

def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 
    
    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        if 'after nuclease' in dirpath or 'Test' in dirpath or 'wrong' in dirpath:
            continue
        
            
        for name in sorted(filenames):
            if "tif" in name and "sti" in name \
            and 'overlay' not in name \
            and 'Composite' not in name:
                # Get information from image name
                c = filenames[-1][-1] 
                
                if c  == '6':
                    condition = 'Control'
                elif c == '8':
                    condition = 'KO'
                
                d_split = dirpath.split('\\')
                well = d_split[-2].split('_')[1].split(' ')[0]
                n_split = name.split('_')
                ch = int(n_split[-1][-5])

                cycle = well
                try: marker = marker_dict[cycle][ch] 
                except: continue
        
                conditions.append(condition)
                fovs.append(d_split[-2].split('_')[1])
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            "FOV": fovs,
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

In [6]:
df_meta_path = data_dir / 'specificity' / 'metadata' / 'info.csv'
df_meta_path.parent.mkdir(parents=True, exist_ok=True)


df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map)
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Loaded df


In [7]:
df

Unnamed: 0,Condition,FOV,Cycle,Channels,Markers,Path
0,Control,cycle1 PLA,cycle1,1,DNA,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\S..."
1,Control,cycle1 PLA,cycle1,3,cyclin D1/CDK4,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\S..."
2,Control,cycle1 PLA,cycle1,4,Bim/Tom20,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\S..."
3,KO,cycle1 PLA,cycle1,1,DNA,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\S..."
4,KO,cycle1 PLA,cycle1,3,cyclin D1/CDK4,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\S..."
5,KO,cycle1 PLA,cycle1,4,Bim/Tom20,"Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\S..."


In [8]:
df.iloc[0].Path

'Y:\\coskun-lab\\Shuangyi\\ERK, YAP project_2022\\Specificity experiments\\HCC827_cycle1 PLA_26Oct23\\XY01\\stitched_1.tif'

In [9]:
df.groupby('Condition').size()

Condition
Control    3
KO         3
dtype: int64

# Save HDF5

In [29]:
import h5py

def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=True,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

In [30]:
df_imgs_path = data_dir / 'specificity' / 'metadata' / 'imgs.csv'
df_imgs_path.parent.mkdir(parents=True, exist_ok=True)

temp_path = data_dir  /'specificity' /  'hdf5' / 'raw'
temp_path.mkdir(parents=True, exist_ok=True)

df_exist = df_imgs_path.is_file()


if not df_exist:
    print('Created df')
    
    group = df.groupby(['Condition', 'FOV'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        group_cycle = df_group.groupby('Cycle')
        for cycle, df_cycle in group_cycle:
            channels = df_cycle.Channels.to_list()
            markers = df_cycle.Markers.to_list()
            paths = df_cycle.Path.to_numpy()
    
            imgs = joblib_loop(read_img, paths)
            imgs = np.array(imgs)
            info = {"Channels": channels, "Markers": markers}
            
            # hdf5 as Channel -> Z mapping
            save_hdf5(file_path, cycle, imgs, info)
    df_imgs = pd.DataFrame(rows, columns=['Condition', 'FOV', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Created df


  0%|          | 0/2 [00:00<?, ?it/s]

In [31]:
df_imgs

Unnamed: 0,Condition,FOV,Path
0,Control,cycle1 PLA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...
1,KO,cycle1 PLA,y:\coskun-lab\Thomas\23_PLA_revision\data\spec...


# Segmentation

In [35]:
from skimage import exposure, util
from cellpose import core, utils, io, models, metrics

use_GPU = core.use_gpu()
yn = ['NO', 'YES']
print(f'>>> GPU activated? {yn[use_GPU]}')

def contrast_str(img, n_min=0.1, n_max=99.95):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

>>> GPU activated? YES


In [33]:
mask_path = data_dir  / 'specificity' / 'imgs' / 'masks'

mask_path.mkdir(parents=True, exist_ok=True)

In [61]:
# Save combined images
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        img_dapi = f['cycle1'][0]
        markers = f['cycle1'].attrs['Markers']

    # Contrast streching and combine to rgb image
    img_dapi = contrast_str(img_dapi, n_max=99.9)    

    # Nuclei segemtnation
    model = models.CellposeModel(gpu=True, model_type='nuclei')
    mask_nuclei, flows, styles = model.eval(img_dapi, 
                                  channels=[3,3],
                                  diameter=80,
                                  flow_threshold=0.4,
                                  )
    
        
    file_path = mask_path / f'Nuclei_{row[1]}.tiff'
    tiff.imwrite(file_path, mask_nuclei)

# Manual segmentation

In [62]:
# Save combined images
for row in df_imgs.iloc[1:].itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        imgs = f['cycle1'][:]
        markers = f['cycle1'].attrs['Markers']
    break

In [65]:
import napari

napari.view_image(imgs.transpose((1,2,0)), rgb=True, contrast_limits=[0,1])

Viewer(camera=Camera(center=(0.0, 1733.0, 2310.0), zoom=0.35193152391306415, angles=(0.0, 0.0, 90.0), perspective=0.0, mouse_pan=True, mouse_zoom=True), cursor=Cursor(position=(1.0, 1.0), scaled=True, size=1, style=<CursorStyle.STANDARD: 'standard'>), dims=Dims(ndim=2, ndisplay=2, last_used=0, range=((0.0, 3467.0, 1.0), (0.0, 4621.0, 1.0)), current_step=(1733, 2310), order=(0, 1), axis_labels=('0', '1')), grid=GridCanvas(stride=1, shape=(-1, -1), enabled=False), layers=[<Image layer 'Image' at 0x1ac9f3fd4f0>], help='use <2> for transform', status='Ready', tooltip=Tooltip(visible=False, text=''), theme='dark', title='napari', mouse_over_canvas=False, mouse_move_callbacks=[], mouse_drag_callbacks=[], mouse_double_click_callbacks=[], mouse_wheel_callbacks=[<function dims_scroll at 0x000001AC8B329790>], _persisted_mouse_event={}, _mouse_drag_gen={}, _mouse_wheel_gen={}, keymap={})