In [22]:
from pathlib import Path
import PIL
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import os

from loguru import logger as log
log.remove()
log.add(sys.stdout, level="ERROR")


from ufish.api import UFish
from cellpose import models

from utils import (
    get_signal_masks, assign_spots, segment_cells, extract_cells,
    plot_figs, plot_all_rois, plot_on_img
)
from skimage.measure import regionprops, label

In [23]:
# Cancer pathology samples : PML/RARA
data_dir = "/home/qzhang/dnafish_data/PML-RARA-Fusion"

if not os.path.exists(data_dir):
    raise FileNotFoundError(f"The directory {data_dir} does not exist.")

sample = os.listdir(data_dir)
print("Sample dirs:", sample)

img_paths = {}

for s in sample:
    sample_path = os.path.join(data_dir, s)
    if os.path.isdir(sample_path):
        for path in Path(sample_path).glob("*.TIF"):
            img_paths[path.stem] = path

print(img_paths)

images = {
    name: np.array(PIL.Image.open(path))
    for name, path in img_paths.items()
}


Sample dirs: ['F2410254A_POS_0.80', 'F2409132A_POS_0.13']
{'F2410254A.299': PosixPath('/home/qzhang/dnafish_data/PML-RARA-Fusion/F2410254A_POS_0.80/F2410254A.299.TIF'), 'F2410254A.1': PosixPath('/home/qzhang/dnafish_data/PML-RARA-Fusion/F2410254A_POS_0.80/F2410254A.1.TIF'), 'F2409132A.268': PosixPath('/home/qzhang/dnafish_data/PML-RARA-Fusion/F2409132A_POS_0.13/F2409132A.268.TIF'), 'F2409132A.1': PosixPath('/home/qzhang/dnafish_data/PML-RARA-Fusion/F2409132A_POS_0.13/F2409132A.1.TIF')}


In [24]:
# initialize cellpose model
cp = models.Cellpose(gpu=False, model_type="nuclei")

# initialize ufish model
uf = UFish()
uf.load_weights("./v1.0.1-DNAFISH_model.onnx")



<ufish.api.UFish at 0x7f14cdd9a1a0>

In [25]:
def filter_cell_by_prop(
        prop,
        img_size,
        area_threshold=1000,
        axis_ratio_threshold=0.5,
        border_threshold=1
        ):
    close_to_edge = False
    if prop.coords[:, 0].min() <= border_threshold or \
            prop.coords[:, 0].max() >= img_size[0] - border_threshold or \
            prop.coords[:, 1].min() <= border_threshold or \
            prop.coords[:, 1].max() >= img_size[1] - border_threshold:
        close_to_edge = True
    if close_to_edge:
        return False
    if prop.area < area_threshold:
        return False
    axis_ratio = prop.minor_axis_length / prop.major_axis_length
    if axis_ratio < axis_ratio_threshold:
        return False
    return True

def filter_cell_by_signals(
        signals, num_thresh_per_channel=3, num_thresh=5):
    count = 0
    for _, signal in signals.items():
        count += signal.shape[0]
        if signal.shape[0] > num_thresh_per_channel:
            return False
    if count < 2:
        return False
    if count > num_thresh:
        return False
    return True

In [26]:
from skimage.morphology import dilation, disk

# Define pipeline
def pipeline(img, signal_channels=[0, 1]):
    print("---------- Begin pipeline ----------")
    print(f"Image shape: {img.shape}")
    print("Step 1: segment cells")
    mask = segment_cells(cp, img)
    print(f"Number of cells: {mask.max()}")
    mask = dilation(mask, disk(3))

    print("Step 2: extract ROIs")
    cell_rois, cell_masks, cell_props = extract_cells(
        img, mask, target_size=128)
    
    print("Step 3: call signals and assign spots for each cell")
    table = []
    cell_signals = []
    new_cell_rois = []
    new_cell_masks = []
    new_cell_props = []
    filtered_by_prop = []
    filtered_by_signals = []
    for c, cell_roi in enumerate(cell_rois):
        cell_prop = cell_props[c]
        if not filter_cell_by_prop(cell_prop, mask.shape):
            filtered_by_prop.append(c)
            continue
        signal_mask_merge, signal_masks_sub = get_signal_masks(
            uf, cell_roi, signal_channels, quantile=5,
            mask_dilation_size=4, hard_threshold=30
        )
        signals = {}
        for ch in signal_channels:
            signals[f"ch{ch+1}"] = []
        name = "+".join([f"ch{ch+1}" for ch in signal_channels])
        signals[name] = []
        for i, ch in enumerate(signal_channels):
            single_ch = regionprops(label(signal_masks_sub[i]))
            spots = np.array([cc.centroid for cc in single_ch])
            signals[f"ch{ch+1}"] = spots
        merged = regionprops(label(signal_mask_merge))
        spots = np.array([cc.centroid for cc in merged])
        name = "+".join([f"ch{ch+1}" for ch in signal_channels])
        signals[name] = spots

        if not filter_cell_by_signals(signals):
            filtered_by_signals.append(c)
            continue

        assigns = {}
        for name, spots in signals.items():
            try:
                assigns[name] = assign_spots(spots, cell_masks[c], 30)
            except Exception:
                assigns[name] = []

        df = {
            key: sum(value) for key, value in assigns.items()
            if isinstance(value, np.ndarray)
        }
        df["cell_id"] = f'{c+1}'
        last_key = list(df.keys())[-1]
        last_value = df.pop(last_key)
        df = {last_key: last_value, **df}
        table.append(df)
        cell_signals.append(signals)
        new_cell_rois.append(cell_roi)
        new_cell_masks.append(cell_masks[c])
        new_cell_props.append(cell_props[c])
    print(f"Number of cells filtered by properties: {len(filtered_by_prop)}")
    print(f"Number of cells filtered by signals: {len(filtered_by_signals)}")
    return pd.DataFrame(table).fillna(0), new_cell_rois, new_cell_masks, cell_signals, new_cell_props


In [27]:
from shutil import rmtree
import os
main_dir = "PML-RARA_results"
os.makedirs(main_dir, exist_ok=True)
for name, img in images.items():
    print(name)
    res_dir = f"./{main_dir}/{name}"
    if os.path.exists(res_dir):
        rmtree(f"./{res_dir}")
    os.mkdir(f"./{res_dir}")

    if name.startswith("Trisomy"):
        signal_channels=[0]
    else:
        signal_channels=[0, 1]
    #img = img[:1000, :1000, :]  # crop image for testing
    table, rois, masks, signals, props = pipeline(img, signal_channels)
    plot_figs(rois, masks, signals, res_dir)

    fig = plot_all_rois(rois, masks, signals, colors={"ch1": "hotpink", "ch2": "lime", "ch1+ch2": "yellow"})
    fig.savefig(f"{res_dir}/{name}_all_cells.pdf")
    plt.close(fig)
    table.to_csv(f"{res_dir}/{name}_results.csv", index=False)

    fig = plot_on_img(img, props)
    fig.savefig(f"{res_dir}/{name}_cell_id.pdf")
    plt.close(fig)


F2410254A.299
---------- Begin pipeline ----------
Image shape: (3000, 4096, 3)
Step 1: segment cells
Number of cells: 512
Step 2: extract ROIs
Step 3: call signals and assign spots for each cell
Number of cells filtered by properties: 62
Number of cells filtered by signals: 71
F2410254A.1
---------- Begin pipeline ----------
Image shape: (3000, 4096, 3)
Step 1: segment cells
Number of cells: 509
Step 2: extract ROIs
Step 3: call signals and assign spots for each cell
Number of cells filtered by properties: 68
Number of cells filtered by signals: 101
F2409132A.268
---------- Begin pipeline ----------
Image shape: (3000, 4096, 3)
Step 1: segment cells
Number of cells: 426
Step 2: extract ROIs
Step 3: call signals and assign spots for each cell
Number of cells filtered by properties: 46
Number of cells filtered by signals: 81
F2409132A.1
---------- Begin pipeline ----------
Image shape: (3000, 4096, 3)
Step 1: segment cells
Number of cells: 504
Step 2: extract ROIs
Step 3: call signals a

In [28]:
import pprint

pprint.pprint(list(images.keys()))

['F2410254A.299', 'F2410254A.1', 'F2409132A.268', 'F2409132A.1']


In [29]:
# Results statistics

## Sample F2408244A positive rate: 0.78
## Sample F2409132A positive rate: 0.13
## Sample F2410254A positive rate: 0.80

res_dir = "PML-RARA_results"

csv_paths = {}

for s in images.keys():
    sample_path = os.path.join(res_dir, s)
    if os.path.isdir(sample_path):
        for path in Path(sample_path).glob("*.csv"):
            csv_paths[path.stem] = path
csv_paths

{'F2410254A.299_results': PosixPath('PML-RARA_results/F2410254A.299/F2410254A.299_results.csv'),
 'F2410254A.1_results': PosixPath('PML-RARA_results/F2410254A.1/F2410254A.1_results.csv'),
 'F2409132A.268_results': PosixPath('PML-RARA_results/F2409132A.268/F2409132A.268_results.csv'),
 'F2409132A.1_results': PosixPath('PML-RARA_results/F2409132A.1/F2409132A.1_results.csv')}

In [30]:
def cal_rate(file_path):

    data = pd.read_csv(file_path, header=0)
    # normal ch1+ch2=2
    #normal_cells = data[data['ch1'] + data['ch2'] == 2]

    # abnorbal 
    abnormal_cells = data[(data['ch1+ch2'] >=2)]
    
    # positive rate
    positive_rate = len(abnormal_cells) / len(data)
    
    return positive_rate

In [33]:

# calculate positive rates
for sample, csv_path in csv_paths.items():
    sample_ = sample.split(".")[0]
    positive_rate = cal_rate(csv_path)
    expected_rate = expected_rates[sample_]
    print(f"Sample {sample} positive rate: {positive_rate:.2f}")


Sample F2410254A.299_results positive rate: 0.47
Sample F2410254A.1_results positive rate: 0.54
Sample F2409132A.268_results positive rate: 0.28
Sample F2409132A.1_results positive rate: 0.26
