In [1]:
import h5py
import numpy as np
import imageio
from cellpose import models
from cellstitch.pipeline import *
from cellstitch.evaluation import *
import pandas as pd

# Pipeline Benchmark

Comparision between cellstitch (2D), 3Dcellpose (2.5D), plantseg (3D); using the same training set. 
### PlantSeg
- First, created a plantseg virtual enviroment: 
`conda create -n plant-seg -c pytorch -c conda-forge -c lcerrone -c awolny pytorch=1.9 pytorch-3dunet=1.3.7 plantseg` 
- activate the environment: `conda activate plant-seg` 
- download the ovules test dataset: https://osf.io/uzq3w/ to `../data/ovules/plantseg_test/`
- perform segmentation with the `confocal_unet_bce_dice_ds1x` by running `plantseg --config plantseg.yaml`
- the predictions are saved to `../data/ovules/plantseg_test/PreProcessing/confocal_unet_bce_dice_ds1x/
`

In [2]:
plantseg_results_folder = "../data/ovules/plantseg_test/PreProcessing/confocal_unet_bce_dice_ds1x/MultiCut/"

In [2]:
test_filenames = ["N_294_final_crop_ds2", 
                 "N_435_final_crop_ds2",
                 "N_441_final_crop_ds2",
                 "N_511_final_crop_ds2",
                 "N_522_final_crop_ds2",
                 "N_590_final_crop_ds2",
                 "N_593_final_crop_ds2"]

In [5]:
for test_filename in test_filenames: 
    print("Starting %s" % test_filename) 
    
    with h5py.File("%s/%s_predictions_multicut.h5" % (plantseg_results_folder, test_filename), "r") as f:
        plantseg = np.array(list(f['segmentation'])) 
        
    plantseg[np.where(plantseg == 1)] = 0 # plantseg use 1 as labels
    np.save("./results/ovules/pipeline/plantseg/%s.npy" % test_filename, plantseg)

Starting N_294_final_crop_ds2
Starting N_435_final_crop_ds2
Starting N_441_final_crop_ds2
Starting N_511_final_crop_ds2
Starting N_522_final_crop_ds2
Starting N_590_final_crop_ds2
Starting N_593_final_crop_ds2


### Train cellpose model from scratch
First, need to prepare training data for cellpose.

In [11]:
train_filenames = ["N_404_ds2x.npy", 
                  "N_405_A_ds2x.npy", 
                  "N_405_B_ds2x.npy", 
                  "N_416_ds2x.npy",
                  "N_422_ds2x.npy",
                  "N_425_ds2x.npy",
                  "N_428_ds2x.npy",
                  "N_440_ds2x.npy",
                  "N_445_ds2x.npy",
                  "N_449_ds2x.npy",
                  "N_450_ds2x.npy", 
                  "N_451_ds2x.npy",
                  "N_454_ds2x.npy",
                  "N_457_ds2x.npy",
                  "N_458_ds2x.npy",
                  "N_487_ds2x.npy",
                  "N_509_ds2x.npy",
                  "N_512_ds2x.npy",
                   "N_517_ds2x.npy",
                  "N_534_ds2x.npy",
                  "N_535_ds2x.npy",
                  "N_536_ds2x.npy"]

ovules_folder = "../data/ovules"
cellpose_folder = "../data/ovules/cellpose_train"

In [12]:
for train_filename in train_filenames: 
    img = np.load("%s/raw/%s" % (ovules_folder, train_filename))
    labels = np.load("%s/labels/%s" % (ovules_folder, train_filename)) 
    depth = img.shape[0] 
    
    for i in range(depth): 
        imageio.imwrite("%s/%s_%s.tif" % (cellpose_folder, train_filename, i), img[i])
        imageio.imwrite("%s/%s_%s_masks.tif" % (cellpose_folder, train_filename, i), labels[i])

`python -m cellpose --train --dir ../data/ovules/cellpose_train --pretrained_model None --n_epochs 100  --verbose` 

### Generate cellpose3d results

In [14]:
model_dir = '../data/ovules/cellpose_train/models/cellpose_residual_on_style_on_concatenation_off_cellpose_train_2022_09_19_14_23_42.578556'

In [15]:
test_filenames = ["N_294_final_crop_ds2", 
                 "N_435_final_crop_ds2",
                 "N_441_final_crop_ds2",
                 "N_511_final_crop_ds2",
                 "N_522_final_crop_ds2",
                 "N_590_final_crop_ds2",
                 "N_593_final_crop_ds2"]
ovules_folder = "../data/ovules"

In [16]:
flow_threshold = 1
model = models.CellposeModel(gpu=True, pretrained_model=model_dir)

In [6]:
for test_filename in test_filenames: 
    print("Starting %s" % test_filename)
    img = np.load("%s/raw/%s.npy" % (ovules_folder, test_filename)) 
    masks, _, _ = model.eval(img, do_3D=True, flow_threshold=flow_threshold, channels = [0,0]) 
    np.save("./results/ovules/pipeline/cellpose3d/%s.npy" % test_filename, masks) 

Starting N_294_final_crop_ds2
Starting N_435_final_crop_ds2
Starting N_441_final_crop_ds2
Starting N_511_final_crop_ds2
Starting N_522_final_crop_ds2
Starting N_590_final_crop_ds2
Starting N_593_final_crop_ds2


### Generate cellstitch results

In [17]:
for test_filename in test_filenames: 
    print("Starting %s" % test_filename)
    img = np.load("%s/raw/%s.npy" % (ovules_folder, test_filename)) 
    
    masks, _, _ = model.eval(list(img), flow_threshold=flow_threshold, channels = [0,0]) 
    cellstitch = np.array(masks)
    full_stitch(cellstitch) 
    np.save("./results/ovules/pipeline/cellstitch/%s.npy" % test_filename, cellstitch) 

Starting N_294_final_crop_ds2
Starting N_435_final_crop_ds2
Starting N_441_final_crop_ds2
Starting N_511_final_crop_ds2
Starting N_522_final_crop_ds2
Starting N_590_final_crop_ds2
Starting N_593_final_crop_ds2


# Benchmark Results

In [27]:
ap_threshold = 0.5

# plantseg benchmark
data = [] 
for filename in test_filenames:
    print("Starting %s" % filename)
    labels = np.load('../data/ovules/labels/%s.npy' % filename)
    true_num_cells = np.unique(labels).size - 1 
    true_avg_vol = get_avg_vol(labels) 
    
    masks = np.load("./results/ovules/pipeline/plantseg/%s.npy" % filename) 
    
    num_cells = np.unique(masks).size - 1
    d_num_cells = abs(num_cells - true_num_cells) / true_num_cells

    avg_vol = get_avg_vol(masks)
    d_avg_vol = abs(true_avg_vol - avg_vol) / true_avg_vol

    ap, tp, fp, fn = average_precision(labels, masks, ap_threshold)
    
    row = [ 
        filename, 
        d_num_cells, 
        d_avg_vol, 
        ap, 
        tp, 
        fp, 
        fn
    ]
    
    print(row)
    data.append(row)

Starting N_294_final_crop_ds2


  iou = overlap / (n_pixels_pred + n_pixels_true - overlap)


['N_294_final_crop_ds2', 0.6335078534031413, 0.8808456934525408, 0.03949483352468427, 344, 5272, 3094]
Starting N_435_final_crop_ds2
['N_435_final_crop_ds2', 1.213047068538398, 0.9454125580383421, 0.018853102906520033, 72, 2608, 1139]
Starting N_441_final_crop_ds2
['N_441_final_crop_ds2', 1.8594306049822065, 1.7830965202251394, 0.01854895515379197, 79, 3135, 1045]
Starting N_511_final_crop_ds2
['N_511_final_crop_ds2', 0.9482596425211665, 0.9207325019255267, 0.03671849156467086, 111, 1960, 952]
Starting N_522_final_crop_ds2
['N_522_final_crop_ds2', 1.7296538821328344, 0.9402124851547904, 0.02863777089783282, 111, 2807, 958]
Starting N_590_final_crop_ds2
['N_590_final_crop_ds2', 0.7425213675213675, 0.5982522063115269, 0.05681350349938246, 138, 1493, 798]
Starting N_593_final_crop_ds2
['N_593_final_crop_ds2', 0.4071246819338422, 0.902965031031866, 0.02798152675903287, 103, 2109, 1469]


In [28]:
plantseg_df = pd.DataFrame(data, columns=[
    "filename",  
    "d_num_cells", 
    "d_avg_vol", 
    "ap", 
    "tp", 
    "fp", 
    "fn"
])

In [29]:
plantseg_df.to_csv("./results/ovules/pipeline/plantseg.csv", index=False)

In [30]:
# cellpose3d benchmark
data = [] 
for filename in test_filenames:
    print("Starting %s" % filename)
    labels = np.load('../data/ovules/labels/%s.npy' % filename)
    true_num_cells = np.unique(labels).size - 1 
    true_avg_vol = get_avg_vol(labels) 
    
    masks = np.load("./results/ovules/pipeline/cellpose3d/%s.npy" % filename) 
    
    num_cells = np.unique(masks).size - 1
    d_num_cells = abs(num_cells - true_num_cells) / true_num_cells

    avg_vol = get_avg_vol(masks)
    d_avg_vol = abs(true_avg_vol - avg_vol) / true_avg_vol

    ap, tp, fp, fn = average_precision(labels, masks, ap_threshold)
    
    row = [ 
        filename, 
        d_num_cells, 
        d_avg_vol, 
        ap, 
        tp, 
        fp, 
        fn
    ]
    
    print(row)
    data.append(row)

Starting N_294_final_crop_ds2
['N_294_final_crop_ds2', 2.1896451425247236, 0.7671165168925141, 0.149561053471668, 1874, 9092, 1564]
Starting N_435_final_crop_ds2
['N_435_final_crop_ds2', 1.5672997522708505, 0.6233465940982975, 0.323935029114312, 1057, 2052, 154]
Starting N_441_final_crop_ds2
['N_441_final_crop_ds2', 3.9919928825622777, 0.7741039634307023, 0.16401659177324576, 949, 4662, 175]
Starting N_511_final_crop_ds2
['N_511_final_crop_ds2', 0.5569143932267169, 0.3720080296495624, 0.5116796440489433, 920, 735, 143]
Starting N_522_final_crop_ds2
['N_522_final_crop_ds2', 2.087932647333957, 0.6983797327368364, 0.23760974228263948, 839, 2462, 230]
Starting N_590_final_crop_ds2
['N_590_final_crop_ds2', 2.8632478632478633, 0.8232116399498604, 0.13714713964526606, 549, 3067, 387]
Starting N_593_final_crop_ds2
['N_593_final_crop_ds2', 3.9840966921119594, 0.8648220857908332, 0.08927744326076888, 771, 7064, 801]


In [31]:
cp_df = pd.DataFrame(data, columns=[
    "filename",  
    "d_num_cells", 
    "d_avg_vol", 
    "ap", 
    "tp", 
    "fp", 
    "fn"
])

cp_df.to_csv("./results/ovules/pipeline/cellpose3d.csv", index=False)

In [32]:
# cellstitch benchmark
data = [] 
for filename in test_filenames:
    print("Starting %s" % filename)
    labels = np.load('../data/ovules/labels/%s.npy' % filename)
    true_num_cells = np.unique(labels).size - 1 
    true_avg_vol = get_avg_vol(labels) 
    
    masks = np.load("./results/ovules/pipeline/cellstitch/%s.npy" % filename) 
    
    num_cells = np.unique(masks).size - 1
    d_num_cells = abs(num_cells - true_num_cells) / true_num_cells

    avg_vol = get_avg_vol(masks)
    d_avg_vol = abs(true_avg_vol - avg_vol) / true_avg_vol

    ap, tp, fp, fn = average_precision(labels, masks, ap_threshold)
    
    row = [ 
        filename, 
        d_num_cells, 
        d_avg_vol, 
        ap, 
        tp, 
        fp, 
        fn
    ]
    
    print(row)
    data.append(row)

Starting N_294_final_crop_ds2


  iou = overlap / (n_pixels_pred + n_pixels_true - overlap)


['N_294_final_crop_ds2', 0.060209424083769635, 0.05142940143641679, 0.23157894736842105, 1254, 1977, 2184]
Starting N_435_final_crop_ds2
['N_435_final_crop_ds2', 0.12303881090008258, 0.14001052168643457, 0.24866440019426905, 512, 848, 699]
Starting N_441_final_crop_ds2
['N_441_final_crop_ds2', 0.4653024911032028, 0.24038858441680125, 0.26298997265268914, 577, 1070, 547]
Starting N_511_final_crop_ds2
['N_511_final_crop_ds2', 0.08748824082784572, 0.05804676008885588, 0.32702349869451697, 501, 469, 562]
Starting N_522_final_crop_ds2
['N_522_final_crop_ds2', 0.29279700654817586, 0.2665386208489184, 0.26666666666666666, 516, 866, 553]
Starting N_590_final_crop_ds2
['N_590_final_crop_ds2', 0.24465811965811965, 0.0009902641088907341, 0.19752186588921283, 271, 436, 665]
Starting N_593_final_crop_ds2
['N_593_final_crop_ds2', 0.08078880407124682, 0.08844273153468665, 0.2175141242937853, 539, 906, 1033]


In [33]:
ct_df = pd.DataFrame(data, columns=[
    "filename",  
    "d_num_cells", 
    "d_avg_vol", 
    "ap", 
    "tp", 
    "fp", 
    "fn"
])

ct_df.to_csv("./results/ovules/pipeline/cellstitch.csv", index=False)

### Analyze Results

In [37]:
cp_df.mean()

  cp_df.mean()


d_num_cells       2.463018
d_avg_vol         0.703284
ap                0.230461
tp              994.142857
fp             4162.000000
fn              493.428571
dtype: float64

In [38]:
cp_df.std()

  cp_df.std()


d_num_cells       1.255826
d_avg_vol         0.166195
ap                0.145758
tp              419.850937
fp             2979.428301
fn              526.126365
dtype: float64