In this notebook we will match the mask numbers of the nuclei and the cell body. 
In order to do this, the segmentation notebook should have been run with struct = CELL and
with struct = NUCLEI.

23/04/25     Jacques Bourg @ Florian Muller lab. Institut Pasteur.

<img src="./HOX_pipeline.png" alt="LNP pipeline" width="1200" height="477">

In [4]:
import sys
import os
import numpy as np
import napari
from skimage import io
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display

In [5]:
%load_ext autoreload
%autoreload 2

base_dir = Path("../../src").resolve()
sys.path.append(str(base_dir))
sys.path.append(str(base_dir / "utils"))
sys.path.append(str(base_dir / "segmentation"))
sys.path.append(str(base_dir / "synthesis"))

from segmentation.refine_seg import Segmentation
from segmentation.thresh_rois_int_size_napari_v3 import ImageThresholding_v3
from synthesis.synthesize import Synthesis
from utils.parameters_tracking import Parameter_tracking as Track
from utils.file_handling import FileProcessor

sg  = Segmentation()
st  = Synthesis()
tk  = Track()
fp  = FileProcessor()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
var = str(Path('../Analysis'))
batch_folders = os.listdir(var)
dropdown = widgets.Dropdown(options=batch_folders, description='Select:', layout=widgets.Layout(width='auto', min_width='150px'))
display(dropdown)

Dropdown(description='Select:', layout=Layout(min_width='150px', width='auto'), options=('.gitkeep', 'test2'),…

In [7]:
n         = np.where(np.array(batch_folders) == dropdown.value)[0][0]
file_path = str(Path(var) / Path(batch_folders[n]) / Path(batch_folders[n] +'.json'))
constants = tk.load_json(file_path)
batch_name= constants['BATCH_NAME']; print(batch_name)

test2


In [8]:
modalities = constants['MODALITIES']
dropdown2 = widgets.Dropdown(options=modalities, description='Select:', layout=widgets.Layout(width='auto', min_width='150px'))
display(dropdown2)

Dropdown(description='Select:', layout=Layout(min_width='150px', width='auto'), options=('EXPERIMENT', 'CONTRO…

In [9]:
n2  = np.where(np.array(modalities) == dropdown2.value)[0][0] 
modality = modalities[n2]; print(modality)

EXPERIMENT


The cell statistics depend on the spot detection method, and therefore we will create a statistics folder for each method  

In [10]:
detection_methods = ['UFISH','BIGFISH']
dropdown8         = widgets.Dropdown(options=detection_methods, description='Select:', layout=widgets.Layout(width='auto', min_width='150px'))
display(dropdown8)

Dropdown(description='Select:', layout=Layout(min_width='150px', width='auto'), options=('UFISH', 'BIGFISH'), …

In [11]:
n8     = np.where(np.array(detection_methods) == dropdown8.value)[0][0]
meth   = detection_methods[n8]; print(meth)

BIGFISH


Model selection

In [12]:
folder_models          = fp.select_folder(initialdir = str(Path(f'../Analysis/{batch_name}/{modality}/DAPI/NUCLEI/train_2D/')), title="Select the path to the cell pose models folder (ex  .../FISH/.../train2D/models) ")     
batch_models           = os.listdir(folder_models)
batch_models_filtered  = [el for el in batch_models if Path(el).suffix == '']
dropdown_model         = widgets.Dropdown(options=batch_models_filtered, description='Select:', layout=widgets.Layout(width='auto', min_width='150px'))
display(dropdown_model)

Selected folder: /home/jacques/Documents/FISH/Data_analysis/pipeline_smfish_in_apifish/pipeline_HOX_apiFISH/HOX_pipeline/Analysis/test2/EXPERIMENT/DAPI/NUCLEI/train_2D/models


Dropdown(description='Select:', layout=Layout(min_width='150px', width='auto'), options=('CP_20250616_142322',…

In [13]:
n4    = np.where((np.array(batch_models) == dropdown_model.value))[0][0]
model = batch_models[n4]; print(model)

CP_20250616_142322


In [14]:
pretrained_model   = Path(folder_models) / Path(model)
batch_mod_chan_mip = constants[f'BATCH_{modality}_DAPI_NUCLEI_MIP']
max_px_thresh      = 200**2
batch              = [Path(el).resolve() for el in batch_mod_chan_mip]

In [15]:
mask_nuc_list_path = constants[f'MASK_{modality}_DAPI_NUCLEI_LIST_PATH']
mask_contours_path = constants[f'MASK_{modality}_DAPI_NUCLEI_CONTOURS_PATH']
dict_masks_nuc     = fp.load_masks_distributed_files(mask_nuc_list_path)

We will now clean the cell/ nuclei masks based on area an intensity criteria.

In [16]:
mask_nuclei_clean_folder_path   = Path(f"../Analysis/{batch_name}/{modality}/DAPI/NUCLEI/masks_cleaned") # create folder for clean nuclei masks
if not mask_nuclei_clean_folder_path.exists():
    mask_nuclei_clean_folder_path.mkdir(parents=True)

In [17]:
im_thr = ImageThresholding_v3(batch, pretrained_model, BATCH_NAME=batch_name, maw_px_thresh=max_px_thresh, MASK_NUC_LIST_PATH= mask_nuc_list_path, MASK_CONTOURS_LIST_PATH=mask_contours_path)
im_thr.run()

After closing the app start from here on.

In [18]:
mask_thresh_area_path    = mask_nuclei_clean_folder_path / Path(f"{batch_name}_masks_thresh_area.npy")
mask_removed_labels_path = mask_nuclei_clean_folder_path / Path(f"{batch_name}_masks_removed.npy")

np.save(mask_thresh_area_path, im_thr.dict_thresh_i_pixa)     
np.save(mask_removed_labels_path, im_thr.dict_labels_to_rem)    # store intermediary files.

In [19]:
mask_cell_clean_folder_path   = Path(f"../Analysis/{batch_name}/{modality}/DAPI/CELL/masks_cleaned")  # create folder for clean cell masks
if not mask_cell_clean_folder_path.exists():
    mask_cell_clean_folder_path.mkdir(parents=True)
    
mask_nuclei_cleaned_list_path = mask_nuclei_clean_folder_path / Path(f"{batch_name}_masks_{modality}_DAPI_NUCLEI.npy")
mask_cell_cleaned_list_path   = mask_cell_clean_folder_path   / Path(f"{batch_name}_masks_{modality}_DAPI_CELL.npy") 

In [20]:
# before starting: create stats for each gene channel:
  
stats_folder = Path(f"../Analysis/{batch_name}/{modality}/DAPI/CELL/stats")    # create the folder masks stats (for the cells)
if not stats_folder.exists():
    stats_folder.mkdir(parents=True)
print(stats_folder)     

stats_folder_meth = Path(f"../Analysis/{batch_name}/{modality}/DAPI/CELL/stats/{meth}")    # create the folder masks stats (for the cells)
if not stats_folder_meth.exists():
    stats_folder_meth.mkdir(parents=True)
print(stats_folder_meth)


for chan_f_t in constants['CHANNELS']:
    if chan_f_t != 'DAPI':
        stats_gene_folder_meth = Path(f"../Analysis/{batch_name}/{modality}/DAPI/CELL/stats/{meth}/{chan_f_t}")    # create the folder masks stats (for the cells)
        if not stats_gene_folder_meth.exists():
            stats_gene_folder_meth.mkdir(parents=True)
        print(stats_gene_folder_meth)

        mask_nuc_list_path = constants[f'MASK_{modality}_DAPI_NUCLEI_LIST_PATH']
        file_ref_mask_stats = stats_gene_folder_meth / f'masks_stats_refs.npy'
        fp.save_masks_stats_distributed_files_init(file_ref_mask_stats, mask_nuc_list_path, col_name='Cell_ID')  # numpy_file_add: str,  mask_path: str, col_name='Cell_ID'  
        
        exec(f"MASK_{modality}_CELL_STATS_{meth}_{chan_f_t}_LIST_PATH = file_ref_mask_stats", globals())

../Analysis/test2/EXPERIMENT/DAPI/CELL/stats
../Analysis/test2/EXPERIMENT/DAPI/CELL/stats/BIGFISH
../Analysis/test2/EXPERIMENT/DAPI/CELL/stats/BIGFISH/FISH0
../Analysis/test2/EXPERIMENT/DAPI/CELL/stats/BIGFISH/FISH1


In [22]:
exec(f"MASK_{modality}_DAPI_NUCLEI_CLEANED_LIST_PATH = mask_nuclei_cleaned_list_path", globals())
exec(f"MASK_{modality}_DAPI_CELL_CLEANED_LIST_PATH = mask_cell_cleaned_list_path", globals())
exec(f"MASK_{modality}_DAPI_NUCLEI_REMOVED_LABELS_LIST_PATH = mask_removed_labels_path", globals())

In [23]:
constants2 = tk.collect_constants()
tk.save_constants_and_commit_hash(constants2, batch_name, folder_path = Path(f"../Analysis/{batch_name}"))

In [24]:
constants = tk.load_json(file_path)

Run for each gene:

In [25]:
channels_fish = constants['CHANNELS'] #   choose a FISH channel, DAPI doesn't make sense
dropdown_f = widgets.Dropdown(options=channels_fish, description='Select:', layout=widgets.Layout(width='auto', min_width='150px'))
display(dropdown_f)

Dropdown(description='Select:', layout=Layout(min_width='150px', width='auto'), options=('DAPI', 'FISH0', 'FIS…

In [29]:
n_fish = np.where(np.array(channels_fish) == dropdown_f.value)[0][0]
chan_f = channels_fish[n_fish]; print(chan_f)

FISH1


In [30]:
file_ref_mask_stats           = constants[f'MASK_{modality}_CELL_STATS_{meth}_{chan_f}_LIST_PATH']
mask_cells_list_path          = constants[f'MASK_{modality}_DAPI_CELL_LIST_PATH']
dict_masks_cells              = fp.load_masks_distributed_files(mask_cells_list_path)
dict_mask_cells_to_rem        = np.load(mask_removed_labels_path, allow_pickle=True)[()]
dic_masks_cell_stats          = fp.load_pd_distributed_files(file_ref_mask_stats)

batch_mod_chan_mip            = constants[f'BATCH_{modality}_DAPI_NUCLEI_MIP']

dict_mask_cells_clean         = {}
dict_mask_nuc_clean           = {}
viewer1                       = napari.Viewer(title="Clean nuclei")

counter         = 0
for ind, file in enumerate(batch_mod_chan_mip):
    file        = Path(file)
    im          = io.imread(file)
    base_name   = '_'.join(file.stem.split('_')[:-3])
    
    masks_cells        = dict_masks_cells[base_name]                             #cleaning
    masks_cells_clean  = sg.remove_labels_from_masks(masks_cells, label_list=dict_mask_cells_to_rem[base_name])
    masks_nuc          = dict_masks_nuc[base_name]
    masks_nuc_clean    = sg.remove_labels_from_masks(masks_nuc, label_list=dict_mask_cells_to_rem[base_name])

    df_stats_cells     = dic_masks_cell_stats[base_name]
    df_stats_cells     = st.roi_selection_account(df_stats_cells, dict_mask_cells_to_rem[base_name])
    dic_masks_cell_stats[base_name] = df_stats_cells

    viewer1.add_image(im, rgb=False, name=f"DAPI  {base_name}")
    viewer1.add_labels(masks_nuc, name=f"Nuclei {base_name}", opacity=.2, blending='translucent')
    viewer1.add_labels(masks_nuc_clean, name=f"Nuclei cleaned{base_name}", opacity=.2, blending='translucent')    
    viewer1.add_labels(masks_cells, name=f"Cells {base_name}", opacity=.8, blending='translucent')
    viewer1.add_labels(masks_cells_clean, name=f"Cells cleaned {base_name}", opacity=.8, blending='translucent')
 
    if counter != 0:
        viewer1.layers[f"DAPI  {base_name}"].visible          = False
        viewer1.layers[f"Nuclei {base_name}"].visible         = False
        viewer1.layers[f"Nuclei cleaned{base_name}"].visible  = False 
        viewer1.layers[f"Cells {base_name}"].visible          = False 
        viewer1.layers[f"Cells cleaned {base_name}"].visible   = False 

    counter +=1    
    dict_mask_cells_clean[base_name] = masks_cells_clean
    dict_mask_nuc_clean[base_name]   = masks_nuc_clean

fp.save_masks_distributed_files(mask_nuclei_cleaned_list_path, dict_mask_nuc_clean)
fp.save_masks_distributed_files(mask_cell_cleaned_list_path, dict_mask_cells_clean)
fp.save_masks_stats_distributed_files_modif(file_ref_mask_stats,  dic_masks_cell_stats)

  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)
  return func(*args, **kwargs)


In [31]:
exec(f"MASK_{modality}_DAPI_NUCLEI_CLEANED_LIST_PATH = mask_nuclei_cleaned_list_path", globals())
exec(f"MASK_{modality}_DAPI_CELL_CLEANED_LIST_PATH = mask_cell_cleaned_list_path", globals())
exec(f"MASK_{modality}_DAPI_NUCLEI_REMOVED_LABELS_LIST_PATH = mask_removed_labels_path", globals())

end of run for each gene

In [32]:
constants2 = tk.collect_constants()
tk.save_constants_and_commit_hash(constants2, batch_name, folder_path = Path(f"../Analysis/{batch_name}"))