In [1]:
import itertools
import os
import sys
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io

from collections import defaultdict
from tqdm.notebook import trange, tqdm, tqdm_notebook
from joblib import Parallel, delayed
import re
import h5py
import tifffile as tiff


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
data_dir = (Path().cwd().parents[0] / 'data').absolute()
data_processed = data_dir / 'processed'
data_raw = r'Y:\coskun-lab\Shuangyi\ERK, YAP project_2022\VP drug effect on HCC827 cells'


In [4]:
import os
 
os.environ['JDK_HOME'] = "C:\Program Files\Java\jdk-17"
os.environ['JAVA_HOME'] = "C:\Program Files\Java\jdk-17"

os.environ['PATH'] += ';C:\\Program Files\\Java\\jdk-17\\jre\\bin\\server\\;C:\\Program Files\\Java\\jdk-17\\bin\\;C:\\Program Files\\Java\\jdk-17\\bin\\jar.exe'

In [5]:
p_dir = (Path().cwd().parents[0]).absolute()

module_path = str(p_dir / "src")
 
if module_path not in sys.path:
    sys.path.append(module_path)

# Get info

In [10]:
markers_map = {
    'cycle1': {
        1: 'DNA', 
        4: 'TEAD1/YAP',
    },
    'cycle2': {
        1: 'DNA', 
        3: 'EGFR/Grb2',
        4: 'Trail/DR5',
    },
    'cycle3': {
        1: 'DNA', 
        4: 'Sox2/Oct4',
    },
    'cycle4': {
        1: 'DNA', 
        4: 'E-cadherin/b-catenin',
    },
    'cycle5': {
        1: 'DNA', 
        2: 'P-EGFR',
        3: 'Phalloidin', 
        4: 'KI67',
    },
}

def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 
    
    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        if 'after nuclease' in dirpath or 'Test' in dirpath or 'wrong' in dirpath:
            continue
        
        for name in sorted(filenames):
            if "tif" in name and "sti" in name \
            and 'overlay' not in name \
            and 'Composite' not in name:
                # Get information from image name
                
                d_split = dirpath.split('\\')
                condition = d_split[-1].split('_')[1]
                n_split = name.split('_')
                ch = int(n_split[-1][-5])

                cycle = 'cycle' + d_split[-1].split('_')[2][-1]
                try: marker = marker_dict[cycle][ch] 
                except: continue
        
                conditions.append(condition)
                fovs.append('FW1')
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            'FOV': fovs, 
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

In [14]:
df_meta_path = data_dir / 'VP_drug2' / 'metadata' / 'info.csv'
df_meta_path.parent.mkdir(parents=True, exist_ok=True)

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map)
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Created df


In [15]:
df.groupby('Condition').size()

Condition
10uM    13
1uM     13
ctrl    13
dtype: int64

# Save hdf5

In [16]:
import h5py

def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=True,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def get_min(imgs):
    shapes = np.array([np.array(img.shape) for img in imgs])
    return np.min(shapes, axis=0)

In [17]:
df_imgs_path = data_dir /  'VP_drug2' / 'metadata' / 'imgs.csv'
df_imgs_path.parent.mkdir(parents=True, exist_ok=True)

temp_path = data_dir  / 'VP_drug2' /  'hdf5' / 'raw'
temp_path.mkdir(parents=True, exist_ok=True)

df_exist = df_imgs_path.is_file()


if not df_exist:
    print('Created df')
    
    group = df.groupby(['Condition'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = name + '.hdf5'
        file_path = temp_path / file_name
        rows.append([name]+[file_path])
        
        channels = df_group.Channels.to_list()
        cycles = df_group.Cycle.to_list()
        markers = df_group.Markers.to_list()
        paths = df_group.Path.to_numpy()
            
        imgs = joblib_loop(read_img, paths)
        min_shape = get_min(imgs)
        imgs_cropped = np.array([img[:min_shape[0], :min_shape[1]] for img in imgs])
        info = {"Cycle": cycles, "Channel": channels, "Marker": markers}
            
            # hdf5 as Channel -> Z mapping
        save_hdf5(file_path, 'imgs', imgs_cropped, info)
    df_imgs = pd.DataFrame(rows, columns=['Condition', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Created df


  0%|          | 0/3 [00:00<?, ?it/s]

# Registration ImageJ

## Registration

In [18]:
import tifffile as tf
from PIL import Image
import PIL.Image
PIL.Image.MAX_IMAGE_PIXELS = 933120000
import shutil
from datetime import date, datetime
import skimage.io 
from skimage import util
from skimage.transform import resize

In [19]:
from skimage import exposure, util

def contrast_str(img, n_min=0.1, n_max=100):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

def make_imgs_same_dim(imgs):
    # Get max dimensions
    shapes = np.array([img.shape[1:] for img in imgs])
    min_x, min_y = shapes.min(axis=0)
    imgs_cropped = [img[:, :min_x, :min_y] for img in imgs]
    # imgs_cropped[0] = contrast_str(imgs_cropped[0])
    return imgs_cropped

In [20]:
regSavePath = data_dir / 'VP_drug2' /'imgs' / 'registered_imagej'
regSavePath.mkdir(parents=True, exist_ok=True)

chs = [1, 2, 3, 4]
group = df.groupby(['Condition', 'FOV'])
for name, df_group in group:
    path = df_group.iloc[0].Path
    
    for cycle, df_cycle in df_group.groupby(['Cycle']):
        cycle = cycle[-1]
        channel = df_cycle.Channels.tolist()
        imgs = [cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in df_cycle.Path.tolist()]
        
        for ch in chs: 
            # Save path per Channel
            folderPath = os.path.join(regSavePath, '_'.join(name), 'Original', 'CH' + str(ch)) # 1 index
            if not os.path.exists(folderPath):
                os.makedirs(folderPath, exist_ok = True)
            
            fileOut = 'CH' + str(ch) + '_Cycle' + str(cycle).zfill(2) + '.tif'
            fileOut = os.path.join(folderPath, fileOut)
            if os.path.exists(fileOut):
                continue

            if ch in channel:
                if ch == 1:
                    img = contrast_str(imgs[list(channel).index(ch)], n_min=0.1, n_max=99.9)
                else:
                    img = imgs[list(channel).index(ch)]
                tf.imwrite(fileOut, img, photometric = 'minisblack', bigtiff = True)

            else:
                emptyImage = np.zeros(imgs[0].shape, np.uint8)
                # print('Dont exist create empty image', cycle, ch)
                tf.imwrite(fileOut, emptyImage, photometric = 'minisblack', bigtiff = True)



In [21]:
group = df.groupby(['Condition', 'FOV'])
chs = [1, 2, 3, 4]

for name, channels in group:
    name = '_'.join(name)
    '''
    run("Register Virtual Stack Slices", "source=[Y:/coskun-lab/Nicky/07 Temp/register large stitch] output=[Y:/coskun-lab/Nicky/07 Temp/register output] feature=Rigid registration=[Rigid                -- translate + rotate                  ] advanced shrinkage save save_dir=[Y:/coskun-lab/Nicky/07 Temp/register output] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Rigid registration_model=[Rigid                -- translate + rotate                  ] interpolate");
    run("Transform Virtual Stack Slices", "source = [Y:/coskun-lab/Nicky/07 Temp/other channels/original] output = [Y:/coskun-lab/Nicky/07 Temp/other channels/original] transforms = [Y:/coskun-lab/Nicky/07 Temp/register output] interpolate");
    '''
    # folder to save registered images separated by channel to apply transforms
    # create all folder
    for ii, ch in enumerate(chs): # all channels
        os.makedirs(os.path.join(regSavePath, name, 'Original', 'CH' + str(ch)), exist_ok = True)
        os.makedirs(os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch)), exist_ok = True)
    
    os.chdir(os.path.join(regSavePath, name, 'Original', 'CH1'))
    now = datetime.now() # current date and time
    date_time = now.strftime("%d%b%Y")
    macro = open(date_time + '_register_transforms.ijm', 'w')
    
    # register cycles on CH1
    macro.write('run("Register Virtual Stack Slices", "source=[')
    # original files
    macro.write(os.path.join(regSavePath, name, 'Original', 'CH1').replace('\\', '/'))
    macro.write('] output=[')
    # registered output files
    macro.write(os.path.join(regSavePath, name, 'Registered', 'CH1').replace('\\', '/'))
    
    # Rigid registration: translation + rotation
    macro.write('] feature=Rigid registration=[Rigid                -- translate + rotate                  ] advanced shrinkage save save_dir=[')
    # folder to save recorded transformations 
    macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Rigid registration_model=[Rigid                -- translate + rotate                  ] interpolate"); \n')
    
    # # bigwrap registration
    # macro.write('] feature=Similarity registration=[Elastic              -- bUnwarpJ splines                    ] advanced shrinkage save save_dir=[')
    # # folder to save recorded transformations 
    # macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=8 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=25 inlier_ratio=0.05 feature_extraction_model=Similarity registration_model=[[Elastic              -- bUnwarpJ splines                    ] interpolate registration=Mono image_subsample_factor=0 initial_deformation=[Very Coarse] final_deformation=Fine divergence_weight=0.1 curl_weight=0.1 landmark_weight=1 image_weight=0 consistency_weight=0 stop_threshold=0.01 shear=0.95 scale=0.95 isotropy=1"); \n')
    
    # Or use similarity: translation + rotation + isotropic scale
    # macro.write('] feature=Similarity registration=[Similarity           -- translate + rotate + isotropic scale] advanced shrinkage save save_dir=[')
    # # folder to save recorded transformations 
    # macro.write('] initial_gaussian_blur=1.60 steps_per_scale_octave=3 minimum_image_size=64 maximum_image_size=1024 feature_descriptor_size=25 feature_descriptor_orientation_bins=8 closest/next_closest_ratio=0.92 maximal_alignment_error=50 inlier_ratio=0.05 feature_extraction_model=Similarity registration_model=[Similarity           -- translate + rotate + isotropic scale] interpolate"); \n')
    
    macro.write('run("Close All"); \n\n')
    
    # now apply transform to other channels
    for ii, ch in enumerate([1,2,3,4]): # each other channel
        
        macro.write('run("Transform Virtual Stack Slices", "source=[')
        # unregsitered folder
        macro.write(os.path.join(regSavePath, name, 'Original', 'CH' + str(ch)).replace('\\', '/'))
        macro.write('] output=[')
        # registered folder
        macro.write(os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch)).replace('\\', '/'))
        macro.write('] transforms=[')
        macro.write(os.path.join(regSavePath, name, 'Original', 'CH1').replace('\\', '/')) # stored in original registration folder
        macro.write('] interpolate"); \n')
        macro.write('run("Close All"); \n\n')
    
    macro.close()
    
    # print command to run macro
    print('runMacro("' + os.path.join(regSavePath, name, 'Original', 'CH1', macro.name).replace('\\', '/') + '");')

runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/VP_drug2/imgs/registered_imagej/10uM_FW1/Original/CH1/20Jan2024_register_transforms.ijm");
runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/VP_drug2/imgs/registered_imagej/1uM_FW1/Original/CH1/20Jan2024_register_transforms.ijm");
runMacro("y:/coskun-lab/Thomas/23_PLA_revision/data/VP_drug2/imgs/registered_imagej/ctrl_FW1/Original/CH1/20Jan2024_register_transforms.ijm");


## Combine all registered images into single folder

In [22]:
regSavePath = data_dir /  'VP_drug2' /'imgs' / 'registered_imagej'

regSaveFinalPath = data_dir / 'VP_drug2' / 'imgs' / 'registered_imagej_final'
regSaveFinalPath.mkdir(parents=True, exist_ok=True)

regSaveCropPath = data_dir /  'VP_drug2' / 'imgs' /  'registered_crop'
regSaveCropPath.mkdir(parents=True, exist_ok=True)

In [23]:
group = df.groupby(['Condition', 'FOV'])

for name, channels in group:
    name = '_'.join(name)
    for ii, cycle in enumerate(tqdm(channels['Cycle'].unique())): # each cycle
    
        dfCycle = channels.loc[channels['Cycle'] == cycle]
        dfCycle.reset_index(drop = True, inplace = True) # index is channel - 1
        cycle = cycle[5:]
        for jj, ch in enumerate(dfCycle.Channels): # each channel
            
            # find registered file
            tifPath = os.path.join(regSavePath, name, 'Registered', 'CH' + str(ch), 'CH' + str(ch)+ '_Cycle' + str(cycle).zfill(2) + '.tif')

            # File out
            fileOut = 'Cycle' + str(cycle).zfill(2) + \
            '_' + 'CH' + str(ch) + '.tif'
            folder = regSaveFinalPath / name
            folder.mkdir(parents=True, exist_ok=True)
            fileOut = os.path.join(regSaveFinalPath, name, fileOut)
            # print(tifPath)
            # Copy
            if os.path.exists(tifPath):
                shutil.copyfile(tifPath, fileOut)
            else:
                continue

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

In [24]:
#### Cropped image to smallest bounding box of non black region

# Get channel list
group = df.groupby(['Condition', 'FOV'])

for name, df_group in group:
    channels = df_group.Channels.tolist()
    break

# Crop
for dir in os.listdir(regSaveFinalPath):

    # Read imgs
    imgs = []
    paths = []
    for file in os.listdir(regSaveFinalPath / dir):
        if 'tif' in file:
            path = regSaveFinalPath / dir/ file
            imgs.append(tiff.imread(path))
            paths.append(file)

    # Get bboxs
    bboxs = []
    for i, img in enumerate(imgs):
        if channels[i] != 1:
            continue
        bbox = skimage.measure.regionprops((img>0).astype(np.uint8))[0]['bbox']
        bboxs.append(np.array(bbox))
    bboxs = np.stack(bboxs)

    bbox_final = [np.max(bboxs[:,0]),
                np.max(bboxs[:,1]),
                np.min(bboxs[:,2]),
                np.min(bboxs[:,3])]

    min_row, min_col, max_row, max_col = bbox_final

    # Save cropped images
    save_dir = regSaveCropPath / dir
    save_dir.mkdir(parents=True, exist_ok=True)
    for i, img in enumerate(imgs):
        save_path = save_dir / paths[i]
        tiff.imwrite(save_path, img[min_row:max_row, min_col:max_col], bigtiff = True)

## Save data

In [25]:
from skimage import util
import h5py

def get_info(data_raw, marker_dict):
    conditions = []
    fovs = []
    cycles = []
    channels = []
    markers = []
    paths = [] 

    # Loop through image folder
    for (dirpath, dirnames, filenames) in os.walk(data_raw):
        for name in sorted(filenames):
            if "tif" in name:
                # Get information from image name
                n_split = name.split('_')
                                
                cond=dirpath.split('\\')[-1].split('_')[0]
                fov=dirpath.split('\\')[-1].split('_')[1]
                cycle='cycle'+str(int(n_split[0][-2:]))
                ch = int(n_split[1][2])
                try:
                    marker = marker_dict[cycle][ch]
                except:
                    continue 
                    
                conditions.append(cond)
                fovs.append(fov)
                cycles.append(cycle)
                channels.append(ch)
                markers.append(marker)
                paths.append(os.path.join(dirpath, name))
                
    info = {
            "Condition": conditions,
            "FOV": fovs,
            "Cycle": cycles,
            "Channels": channels,
            "Markers": markers,
            "Path": paths
        }

    df = pd.DataFrame(info)
    return df

def save_hdf5(
    path: str, name: str, data: np.ndarray, attr_dict=None, mode: str = "a"
) -> None:
    # Read h5 file
    hf = h5py.File(path, mode)
    # Create z_stack_dataset
    if hf.get(name) is None:
        data_shape = data.shape
        data_type = data.dtype
        max_shape = (data_shape[0],) + data_shape[1:]
        dset = hf.create_dataset(
            name,
            shape=data_shape,
            maxshape=max_shape,
            chunks=True,
            dtype=data_type,
            compression="gzip",
        )
        dset[:] = data
        if attr_dict is not None:
            for attr_key, attr_val in attr_dict.items():
                dset.attrs[attr_key] = attr_val
    else:
        print(f"Dataset {name} exists")

    hf.close()

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

def read_img(path):
    return skimage.io.imread(path, as_gray=True)

def get_min(imgs):
    shapes = np.array([np.array(img.shape) for img in imgs])
    return np.min(shapes, axis=0)

In [29]:
data_raw = data_dir / 'VP_drug2'/ 'imgs' /  'registered_crop'
df_meta_path = data_dir /  'VP_drug2'/ 'metadata' / 'info_sti.csv'

try:
    df_meta_path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_meta_path.is_file()

if not df_exist:
    print('Created df')
    df = get_info(data_raw, markers_map)
    df.to_csv(df_meta_path, index=False)
else:
    print('Loaded df')
    df = pd.read_csv(df_meta_path)

Folder is already there
Created df


In [30]:
df.groupby('Condition').size()

Condition
10uM    13
1uM     13
ctrl    13
dtype: int64

In [31]:
df_imgs_path = data_dir /'VP_drug2'/ 'metadata' / 'imgs_reg.csv'
temp_path =data_dir /'VP_drug2' / 'hdf5' / 'registered'
try:
    temp_path.mkdir(parents=True, exist_ok=False)
except FileExistsError:
    print("Folder is already there")

df_exist = df_imgs_path.is_file()

if not df_exist:
    print('Created df')
    group = df.groupby(['Condition','FOV'])
    rows = []

    for name, df_group in tqdm(group, total=len(group)):
        file_name = '_'.join(np.array(name).astype(str)) + '.hdf5'
        file_path = temp_path / file_name
        rows.append(list(name)+[file_path])
        
        # if file_path.exists():
        #     continue
        
        channels = df_group.Channels.to_list()
        cycles = df_group.Cycle.to_list()
        markers = df_group.Markers.to_list()
        paths = df_group.Path.to_numpy()
            
        imgs = joblib_loop(read_img, paths)
        min_shape = get_min(imgs)
        imgs_cropped = np.array([img[:min_shape[0], :min_shape[1]] for img in imgs])
        info = {"Cycle": cycles, "Channel": channels, "Marker": markers}

        imgs_cropped = util.img_as_ubyte(imgs_cropped)
        
        # hdf5 as Channel -> Z mapping
        save_hdf5(file_path, 'imgs', imgs_cropped, info)
    df_imgs = pd.DataFrame(rows, columns=['Condition', 'FOV', 'Path'])        
    df_imgs.to_csv(df_imgs_path, index=False)
else:
    print('Loaded df')
    df_imgs = pd.read_csv(df_imgs_path)

Created df


  0%|          | 0/3 [00:00<?, ?it/s]

In [32]:
df_imgs

Unnamed: 0,Condition,FOV,Path
0,10uM,FW1,y:\coskun-lab\Thomas\23_PLA_revision\data\VP_d...
1,1uM,FW1,y:\coskun-lab\Thomas\23_PLA_revision\data\VP_d...
2,ctrl,FW1,y:\coskun-lab\Thomas\23_PLA_revision\data\VP_d...


# Segmentation

## Generate images

In [8]:
import napari 
from skimage import exposure, util

def contrast_str(img, n_min=0.1, n_max=99.9):
    p2, p98 = np.percentile(img, (n_min, n_max))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    img_rescale = util.img_as_ubyte(img_rescale)
    return img_rescale

In [9]:
df_imgs_path = data_dir /'VP_drug2' /'metadata' / 'imgs_reg.csv'
df_imgs = pd.read_csv(df_imgs_path)

In [10]:
# Save combined images
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        imgs = f['imgs'][:]
        markers = f['imgs'].attrs['Marker']

    napari.view_image(imgs, name=markers, channel_axis=0, visible=False)
    break

In [10]:
markers

array(['DNA', 'TEAD1/YAP', 'DNA', 'EGFR/Grb2', 'Trail/DR5', 'DNA',
       'Sox2/Oct4', 'DNA', 'E-cadherin/b-catenin', 'DNA', 'P-EGFR',
       'Phalloidin', 'KI67'], dtype=object)

In [11]:
cyto_markers = ['P-EGFR', 'Phalloidin']

In [12]:
whole_seg_path = data_dir /  'VP_drug2' / 'imgs' / 'segmentation'
whole_seg_path.mkdir(parents=True, exist_ok=True)


# Save combined images
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    with h5py.File(path, "r") as f:
        imgs = f['imgs'][:]
        markers = f['imgs'].attrs['Marker']

    # Get dapi and cyto imgaes
    indices = np.isin(markers, cyto_markers)
    img_dapi = imgs[0]
    imgs_cyto = imgs[indices,:]
    
    # Contrast streching and combine to rgb image
    img_dapi = contrast_str(img_dapi, n_max=99.9)
    imgs_cyto_scaled = [contrast_str(imgs_cyto[0], n_max=99.5), contrast_str(imgs_cyto[1], n_max=99.5)]
    img_cyto = np.max(np.array(imgs_cyto_scaled), axis=0)
    img_rgb = np.stack([np.zeros(img_dapi.shape),img_cyto, img_dapi], axis=0).astype(np.uint8)
    
    # Crop and save
    file_name = f'{"_".join(row[1:3])}.tif'
    file_path = whole_seg_path / file_name
    tiff.imwrite(file_path, img_rgb)

# QC masks

In [13]:
from skimage import morphology, measure
from skimage.segmentation import clear_border
from collections import defaultdict
   
def count_pixel_label_mask(regionmask, intensity_image):
    v,c = np.unique(intensity_image[regionmask], return_counts=True)
    return dict(zip(v,c))
    
# Quality control of mask
def qc_nuclei(mask_cyto, mask_nuclei, small_size=10000):
    '''
    Function to check if cell masks contain nuclei
    '''
    # Dictionnary storing nuclei and cyto label to cell id 
    nuclei2cell = {}
    cyto2cell = {}
    
    # Filter out small objects
    mask_cyto = morphology.remove_small_objects(mask_cyto,  min_size=small_size)
    
    # Filter out mask touching border
    mask_cyto = clear_border(mask_cyto)
    
    # Filtered only cell mask region
    cell_mask = np.where(mask_cyto > 0, 1, 0)
    mask_nuclei_filtered = mask_nuclei * cell_mask
    mask_nuclei_filtered =  morphology.remove_small_objects(mask_nuclei_filtered,  min_size=2000)
    
    nuclei_mask = np.where(mask_nuclei>0, 1, 0)
    cyto = (mask_cyto - mask_cyto*nuclei_mask).astype(np.uint16)
    
    # Count pixel cell label in each nuclei region to assign each nuclei to cell
    props = measure.regionprops(mask_nuclei_filtered, intensity_image=mask_cyto, 
                    extra_properties=(count_pixel_label_mask,))
    nuclei_labels = []
    cell_labels = []
    for prop in props:
        df = pd.DataFrame.from_dict(prop['count_pixel_label_mask'], orient='index').reset_index()
        df.columns = ['Label', 'Count']
        corresponding_label = df.iloc[df['Count'].argmax(axis=0)]['Label']
        nuclei_labels.append(prop['Label'])
        cell_labels.append(corresponding_label)
    
    df = pd.DataFrame({'Nuclei': nuclei_labels, 'Cyto': cell_labels})
    return mask_cyto, mask_nuclei_filtered, cyto, df

In [14]:
# Read masks paths 
mask_dir = data_dir /  'VP_drug2' / 'imgs' / 'masks'
mask_filt_dir = data_dir /  'VP_drug2'  / 'imgs' / 'masks_filtered'
mask_filt_dir.mkdir(parents=True, exist_ok=True)

masks_path = defaultdict(dict) 
for path in os.listdir(mask_dir):
    name = path.split('.')[0]
    if 'Nuclei' in name:
        masks_path[name[7:]]['nuclei'] = mask_dir / path
    else:
        masks_path[name]['cyto'] = mask_dir / path


In [15]:
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    name = '_'.join([row.Condition, row.FOV])
    
    # Read masks
    mask_cyto_path = masks_path[name]['cyto']
    mask_nuclei_path = masks_path[name]['nuclei']
    
    mask_cyto = skimage.io.imread(mask_cyto_path)
    mask_nuclei = skimage.io.imread(mask_nuclei_path)
    mask_nuclei = mask_nuclei[:mask_cyto.shape[0], :mask_cyto.shape[1]]
    cell, nuclei, cyto, df = qc_nuclei(mask_cyto, mask_nuclei)
    
    file_path =  mask_filt_dir / f'Nuclei_{name}.tif'
    tiff.imwrite(file_path, nuclei)
    file_path =  mask_filt_dir / f'Cell_{name}.tif'
    tiff.imwrite(file_path, cell)
    file_path =  mask_filt_dir / f'Cyto_{name}.tif'
    tiff.imwrite(file_path, cyto)
    file_path =  mask_filt_dir / f'df_{name}.csv'
    df.to_csv(file_path, index=False)

# Quanfitication PPI

In [16]:
import PLA

PPI_save_path =  data_dir / 'VP_drug2' / 'PPI'
PPI_save_path.mkdir(parents=True, exist_ok=True)

PPI_imgs_path =  data_dir / 'VP_drug2'  / 'PPI_imgs'
PPI_imgs_path.mkdir(parents=True, exist_ok=True)

In [17]:
for row in df_imgs.itertuples():
    path = row.Path
    with h5py.File(path, 'r') as f:
        markers = f['imgs'].attrs['Marker']
    print(markers)


['DNA' 'TEAD1/YAP' 'DNA' 'EGFR/Grb2' 'Trail/DR5' 'DNA' 'Sox2/Oct4' 'DNA'
 'E-cadherin/b-catenin' 'DNA' 'P-EGFR' 'Phalloidin' 'KI67']
['DNA' 'TEAD1/YAP' 'DNA' 'EGFR/Grb2' 'Trail/DR5' 'DNA' 'Sox2/Oct4' 'DNA'
 'E-cadherin/b-catenin' 'DNA' 'P-EGFR' 'Phalloidin' 'KI67']
['DNA' 'TEAD1/YAP' 'DNA' 'EGFR/Grb2' 'Trail/DR5' 'DNA' 'Sox2/Oct4' 'DNA'
 'E-cadherin/b-catenin' 'DNA' 'P-EGFR' 'Phalloidin' 'KI67']


In [18]:
df_imgs

Unnamed: 0,Condition,FOV,Path
0,10uM,FW1,y:\coskun-lab\Thomas\23_PLA_revision\data\VP_d...
1,1uM,FW1,y:\coskun-lab\Thomas\23_PLA_revision\data\VP_d...
2,ctrl,FW1,y:\coskun-lab\Thomas\23_PLA_revision\data\VP_d...


In [19]:
PPI_markers = ['TEAD1/YAP',  'EGFR/Grb2', 'Trail/DR5', 'Sox2/Oct4','E-cadherin/b-catenin']

In [21]:
for row in df_imgs.itertuples():
    # Read image
    path = row.Path
    pla_detect = PLA.PLA_detection(path, name='imgs', m='Marker')
    
    with h5py.File(path, 'r') as f:
        markers = f['imgs'].attrs['Marker']
        
    imgs_spots = []
    imgs_wths = []
    imgs_raw = []
    for RNA in PPI_markers: 
        if RNA in markers:
            img_spot, img_wth, _, img = pla_detect.detect_spot(RNA, thres=0.03, min_radius=2)
            imgs_spots.append(img_spot)
            imgs_wths.append(img_wth)
            imgs_raw.append(img)

    # Save imgs
    file_path = PPI_imgs_path / (row[1] + '_raw.tiff')
    tiff.imwrite(file_path, imgs_raw)
    file_path = PPI_imgs_path / (row[1] + '_processed.tiff')
    tiff.imwrite(file_path, imgs_wths)
    file_path = PPI_imgs_path / (row[1] + '_detected.tiff')
    tiff.imwrite(file_path, imgs_spots)

    # Save PPI dict
    name = row[1] +'.pkl'
    pla_detect.save_pickle(PPI_save_path / name)

Reading image TEAD1/YAP
Processing image TEAD1/YAP
(array([0, 1], dtype=uint8), array([30726959,  1639081], dtype=int64))
Reading image EGFR/Grb2
Processing image EGFR/Grb2
(array([0, 1], dtype=uint8), array([32218504,   147536], dtype=int64))
Reading image Trail/DR5
Processing image Trail/DR5
(array([0, 1], dtype=uint8), array([32180475,   185565], dtype=int64))
Reading image Sox2/Oct4
Processing image Sox2/Oct4
(array([0, 1], dtype=uint8), array([31747116,   618924], dtype=int64))
Reading image E-cadherin/b-catenin
Processing image E-cadherin/b-catenin
(array([0, 1], dtype=uint8), array([32198025,   168015], dtype=int64))
File exist. Deleted
Reading image TEAD1/YAP
Processing image TEAD1/YAP
(array([0, 1], dtype=uint8), array([30001546,  2098002], dtype=int64))
Reading image EGFR/Grb2
Processing image EGFR/Grb2
(array([0, 1], dtype=uint8), array([31902926,   196622], dtype=int64))
Reading image Trail/DR5
Processing image Trail/DR5
(array([0, 1], dtype=uint8), array([31846796,   25275

# Extract per cell PPI count

In [22]:
import pickle 

def read_PPI(path):
    with open(path, 'rb') as file:
        PPI_dict = pickle.load(file)

    return PPI_dict

def create_PPI_df(PPI_labels, PPI_loc, name, cyto=True):
    if cyto:
        columns_name = ['Cyto', 'x', 'y']
    else:
        columns_name = ['Nuclei', 'x', 'y']
    df = pd.DataFrame(np.hstack([PPI_labels[:,np.newaxis], PPI_loc]), 
                      columns=columns_name)
    df['PPI'] = name
    return df

In [23]:
mask_filt_dir = data_dir / 'VP_drug2' / 'imgs' / 'masks_filtered'
PPI_save_path =  data_dir / 'VP_drug2' / 'PPI'

masks_path = defaultdict(dict) 
for path in os.listdir(mask_filt_dir):
    name = path.split('.')[0]
    if 'Nuclei' in name:
        masks_path[name[7:]]['nuclei'] = mask_filt_dir / path
    elif 'Cyto' in name:
        masks_path[name[5:]]['cyto'] =mask_filt_dir / path
    elif 'Cell' in name:
        masks_path[name[5:]]['cell'] =mask_filt_dir / path    
    elif 'df' in name:
        masks_path[name[3:]]['df'] =mask_filt_dir / path
    else:
        pass

In [27]:
for row in df_imgs.itertuples():
    name = '_'.join(row[1:3])
    
    # Read masks
    mask_cyto_path = masks_path[name]['cell']
    mask_nuclei_path = masks_path[name]['nuclei']
    df_path =  masks_path[name]['df']
    
    mask_cyto = skimage.io.imread(mask_cyto_path)
    mask_nuclei = skimage.io.imread(mask_nuclei_path)
    df_cell_info = pd.read_csv(df_path)
    nuclei2cell = dict(zip(df_cell_info.iloc[:,0], df_cell_info.iloc[:,1]))   
    
    # Read PPi
    PPI_dict = read_PPI(PPI_save_path / f'{row[1]}.pkl')
    dfs_PPI_cyto = []
    dfs_PPI_nuclei = []
    for k in PPI_dict.keys():
        PPI_loc = PPI_dict[k][:, 1:3].astype(np.uint32)
        
        # Cyto
        PPI_labels = mask_cyto[PPI_loc[:,0], PPI_loc[:,1]]
        df_PPI = create_PPI_df(PPI_labels, PPI_loc, k)
        dfs_PPI_cyto.append(df_PPI)
        
        # Nuclei
        PPI_labels = mask_nuclei[PPI_loc[:,0], PPI_loc[:,1]]
        df_PPI = create_PPI_df(PPI_labels, PPI_loc, k, cyto=False)
        dfs_PPI_nuclei.append(df_PPI)
    
    # Combined DFs
    df_PPI_cyto = pd.concat(dfs_PPI_cyto)
    df_PPI_nuclei = pd.concat(dfs_PPI_nuclei)
    df_PPI_nuclei['Nuclei_Cell'] = df_PPI_nuclei['Nuclei'].apply(lambda x: nuclei2cell.get(x,x))   
    df_merged = df_PPI_cyto.merge(df_PPI_nuclei)
    df_merged['Condition'] = row[1]
    df_merged['FOV'] = row[2]
    
    # Save dataframe
    path = PPI_save_path / f'{name}.csv'
    df_merged.to_csv(path, index=False)

# Coexpression between IF and PPI markers

In [7]:
from functools import partial

def read_mean_pixels(x, y, window_size, path):
    x_min = np.clip(x-window_size,a_min=0, a_max=None)
    x_max = x+window_size
    y_min = np.clip(y-window_size, a_min=0, a_max=None)
    y_max = y+window_size
    
    with h5py.File(path, "r") as f:
        pixels = f['imgs'][:, y_min:y_max, x_min:x_max]
    mean_expression = pixels.mean(axis=(1,2))
    return mean_expression
    
def extract(df, path, window_size=5):
    x = df['x'].to_numpy()
    y = df['y'].to_numpy()
    
    # Read markers name 
    with h5py.File(path, "r") as f:
        markers = f['imgs'].attrs['Marker']
    
    # Define partial and joblib
    read_partial = partial(read_mean_pixels, window_size=window_size, path=path)
    mean_expressions = Parallel(n_jobs=20)(delayed(read_partial)(i,j) for i,j in zip(y,x))
    
    # create dataframe
    _, indices = np.unique(markers, return_index=True)
    indices.sort()
    marker_unique = markers[indices]
    mean_expressions = np.stack(mean_expressions)
    df_exp = pd.DataFrame(mean_expressions[:, indices], columns=marker_unique)
    return df_exp

In [8]:
# Read PPI
PPI_save_path =  data_dir / 'VP_drug2' / 'PPI'

dfs = []
for path in os.listdir(PPI_save_path):
    if 'csv' in path:
        df = pd.read_csv(PPI_save_path / path)
        dfs.append(df)

df = pd.concat(dfs)
df.reset_index(inplace=True, drop=True)
df_fil = df[df.Cyto > 0]

In [9]:
df_imgs_path = data_dir /'VP_drug2' /'metadata' / 'imgs_reg.csv'
df_imgs = pd.read_csv(df_imgs_path)

In [10]:
# Get Co-expression data
PPI_exp_path =  data_dir / 'VP_drug2' / 'PPI' / 'expression'
PPI_exp_path.mkdir(parents=True, exist_ok=True)

group = df_fil.groupby(['Condition', 'FOV'])

for name, df_group in group:
    path = df_imgs[(df_imgs.Condition == name[0]) & (df_imgs.FOV == name[1])].Path.item()
    df_expression = extract(df_group, path)
    
    df_merged = pd.concat([df_group.reset_index().rename({'index':'Original Index'}, axis=1), 
                           df_expression], axis=1)
    
    # Save coexpression
    file_name = '_'.join(name)
    save_path = PPI_exp_path / f'{file_name}.csv'
    df_merged.to_csv(save_path)

# Area quantification

In [11]:
from skimage.filters import threshold_local, gaussian
from skimage.filters import threshold_otsu, rank
from skimage.morphology import disk
from skimage import exposure, measure

def threshold(img):
    p2, p98 = np.percentile(img, (2, 99.9))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))

    img_blur = gaussian(img_rescale , sigma=15, preserve_range=True).astype(np.uint8)
    img_pro = cv2.subtract(img_rescale, img_blur)

    thresh = 50
    binary = img_pro >= thresh
    return binary

def custom(regionmask, intensity_image):
    return np.sum(intensity_image[regionmask])

In [12]:
mask_filt_dir = data_dir / 'VP_drug2' / 'imgs' / 'masks_filtered'
PPI_save_path =  data_dir / 'VP_drug2' / 'PPI'

masks_path = defaultdict(dict) 
for path in os.listdir(mask_filt_dir):
    name = path.split('.')[0]
    if 'Nuclei' in name:
        masks_path[name[7:]]['nuclei'] = mask_filt_dir / path
    elif 'Cyto' in name:
        masks_path[name[5:]]['cyto'] =mask_filt_dir / path
    elif 'Cell' in name:
        masks_path[name[5:]]['cell'] =mask_filt_dir / path    
    elif 'df' in name:
        masks_path[name[3:]]['df'] =mask_filt_dir / path
    else:
        pass

In [13]:
df_imgs_path = data_dir /'VP_drug2' /'metadata' / 'imgs_reg.csv'
df_imgs = pd.read_csv(df_imgs_path)

In [14]:
marker = 'KI67'

In [147]:
dfs = []

for row in df_imgs.itertuples():
    path = row.Path

    # Read markers name 
    with h5py.File(path, "r") as f:
        markers = f['imgs'].attrs['Marker']
        imgs = f['imgs'][:]

    img = imgs[list(markers).index(marker)]
    binary = threshold(img)

    name = '_'.join(row[1:3])
    
    # Read masks
    mask_cyto_path = masks_path[name]['cell']
    mask_nuclei_path = masks_path[name]['nuclei']
    df_path =  masks_path[name]['df']
    mask_nuclei = skimage.io.imread(mask_nuclei_path)
    df_cell_info = pd.read_csv(df_path)
    nuclei2cell = dict(zip(df_cell_info.iloc[:,0], df_cell_info.iloc[:,1]))   
    
    # Get values
    props = measure.regionprops_table(mask_nuclei, intensity_image=binary.astype(np.uint8),
                                  properties=('label', 'area'),
                                  extra_properties=(custom, ))
    df = pd.DataFrame(props)
    df.columns = ['Nuclei', 'Nuclei Area', 'Ki67 Area']
    df['Nuclei_Cell'] = df['Nuclei'].apply(lambda x: nuclei2cell.get(x,x))   
    df['Condition'] = row.Condition
    dfs.append(df)

In [None]:
# Get Co-expression data
PPI_exp_path =  data_dir / 'VP_drug2' / 'PPI' / 'expression'
PPI_exp_path.mkdir(parents=True, exist_ok=True)

df = pd.concat(dfs)
df.to_csv(PPI_exp_path / 'Ki67.csv')

# Ki67 Quant

In [21]:
mask_filt_dir = data_dir / 'VP_drug2' / 'imgs' / 'masks_filtered'
PPI_save_path =  data_dir / 'VP_drug2' / 'PPI'

masks_path = defaultdict(dict) 
for path in os.listdir(mask_filt_dir):
    name = path.split('.')[0]
    if 'Nuclei' in name:
        masks_path[name[7:]]['nuclei'] = mask_filt_dir / path
    elif 'Cyto' in name:
        masks_path[name[5:]]['cyto'] =mask_filt_dir / path
    elif 'Cell' in name:
        masks_path[name[5:]]['cell'] =mask_filt_dir / path    
    elif 'df' in name:
        masks_path[name[3:]]['df'] =mask_filt_dir / path
    else:
        pass

In [22]:
df_imgs_path = data_dir /'VP_drug2' /'metadata' / 'imgs_reg.csv'
df_imgs = pd.read_csv(df_imgs_path)

In [23]:
marker = 'KI67'

In [29]:
dfs = []

for row in df_imgs.itertuples():
    path = row.Path

    # Read markers name 
    with h5py.File(path, "r") as f:
        markers = f['imgs'].attrs['Marker']
        imgs = f['imgs'][:]

    img = imgs[list(markers).index(marker)]

    name = '_'.join(row[1:3])
    
    # Read masks
    mask_cyto_path = masks_path[name]['cell']
    mask_nuclei_path = masks_path[name]['nuclei']
    df_path =  masks_path[name]['df']
    mask_cyto = skimage.io.imread(mask_cyto_path)

    # Get values
    props = measure.regionprops_table(mask_cyto, intensity_image=img,
                                  properties=('label', 'area', 'mean_intensity'))
    df = pd.DataFrame(props)
    df.columns = ['Cell', 'Area', 'Ki67']
    df['Condition'] = row.Condition
    dfs.append(df)

In [30]:
# Get Co-expression data
PPI_exp_path =  data_dir / 'VP_drug2' / 'PPI' / 'expression'
PPI_exp_path.mkdir(parents=True, exist_ok=True)

df = pd.concat(dfs)
df = df[df.Area > 7000]
df.to_csv(PPI_exp_path / 'Ki67_intensity.csv')

In [31]:
df

Unnamed: 0,Cell,Area,Ki67,Condition
0,8,38907.0,12.138278,10uM
1,10,15708.0,7.348739,10uM
2,11,10365.0,15.865027,10uM
3,14,29638.0,3.150989,10uM
4,15,26603.0,7.771454,10uM
...,...,...,...,...
508,617,32728.0,2.413560,ctrl
509,618,15838.0,6.887549,ctrl
510,620,28077.0,11.681590,ctrl
511,621,33493.0,1.594333,ctrl


# B-catenin mean expression

In [19]:

from skimage import measure

mask_filt_dir = data_dir / 'VP_drug2'/ 'imgs' / 'masks_filtered'
PPI_save_path =  data_dir / 'VP_drug2'/ 'PPI'

masks_path = defaultdict(dict) 
for path in os.listdir(mask_filt_dir):
    name = path.split('.')[0]
    if 'Nuclei' in name:
        masks_path[name[7:]]['nuclei'] = mask_filt_dir / path
    elif 'Cyto' in name:
        masks_path[name[5:]]['cyto'] =mask_filt_dir / path
    elif 'Cell' in name:
        masks_path[name[5:]]['cell'] =mask_filt_dir / path    
    elif 'df' in name:
        masks_path[name[3:]]['df'] =mask_filt_dir / path
    else:
        pass

In [20]:
df_imgs_path = data_dir / 'VP_drug2' /'metadata' / 'imgs_reg.csv'
df_imgs = pd.read_csv(df_imgs_path)

In [21]:
marker = 'E-cadherin/b-catenin'

In [22]:
dfs = []

for row in df_imgs.itertuples():
    path = row.Path

    # Read markers name 
    with h5py.File(path, "r") as f:
        markers = f['imgs'].attrs['Marker']
        imgs = f['imgs'][:]

    img = imgs[list(markers).index(marker)]

    name = '_'.join(row[1:3])
    
    # Read masks
    mask_cyto_path = masks_path[name]['cell']
    mask_nuclei_path = masks_path[name]['nuclei']
    df_path =  masks_path[name]['df']
    mask_cyto = skimage.io.imread(mask_cyto_path)
    
    # Get values
    props = measure.regionprops_table(mask_cyto, intensity_image=img,
                                  properties=('label', 'area', 'mean_intensity'))
    df = pd.DataFrame(props)
    df.columns = ['Cell', 'Area', marker]
    df['Condition'] = row.Condition
    dfs.append(df)

In [23]:
# Get Co-expression data
PPI_exp_path =  data_dir /  'VP_drug2'/ 'PPI' / 'expression'
PPI_exp_path.mkdir(parents=True, exist_ok=True)

df = pd.concat(dfs)
df = df[df.Area > 7000]

df.to_csv(PPI_exp_path / 'bcatenin_intensity.csv')

In [24]:
df

Unnamed: 0,Cell,Area,E-cadherin/b-catenin,Condition
0,8,38907.0,52.069036,10uM
1,10,15708.0,41.231602,10uM
2,11,10365.0,47.662808,10uM
3,14,29638.0,11.676260,10uM
4,15,26603.0,29.028756,10uM
...,...,...,...,...
508,617,32728.0,8.806924,ctrl
509,618,15838.0,26.462116,ctrl
510,620,28077.0,34.124978,ctrl
511,621,33493.0,6.475592,ctrl
