# Run Cellpose model

This runs the a cellpose model on Vectra Polaris spectrally unmixed multichannel tif files.  Run from an env that includes cellpose 

Dependencies:
1. Use the cellpose GUI to train a model from an existing one. Tip: models that train well for Polaris images are CellPose (CP) and TissueNet2 (TN2), but will do better with some human-in-the-loop additional training.  The model is expected to be saved in the current dir within a models folder.    
2. Polaris *_component_data.tif files pre-processed to be in 3 channel RGB 8-bit format i.e. uint8 0-255 range, having at least one channel for cyto, and/or nuclei (plus option to put whatever else in channel3 for review purposes) are expected to reside in a single directory in tif format.  See prior notebook for setup scripts, Format_Polaris_for_cellpose. 

Useful links:   
http://www.cellpose.org  
https://www.youtube.com/watch?v=5qANHWoubZU  
https://cellpose.readthedocs.io/en/latest/index.html  * for installation instructions.  
https://github.com/MouseLand/cellpose/tree/master  

### ToDo  
* Plot summary statistics of the masks generated

### Install Instructions  



In [None]:
import numpy as np
import time, os, sys
from urllib.parse import urlparse
import matplotlib.pyplot as plt
# %matplotlib inline 
plt.rcParams['figure.dpi'] = 300
from cellpose import models, utils, io, plot
import fnmatch, re, skimage


## Variables to set each time 

In [None]:

# Location of consolidated data file
root_dir = '/Users/annmstrange/Documents/Projects/Tumor IF'
#rgb_input_dir = os.path.join(root_dir, 'Panel2/CellPose40x')

# output for RGB files
#rgb_input_dir = os.path.join(root_dir, 'Panel2/RGB_CellPose40x_23')
rgb_input_dir = os.path.join(root_dir, 'Panel3/RGB_CellPose40x_23')
#rgb_input_dir = os.path.join(root_dir, 'Panel2/RGB_CellPose40x_21')

# Masks dir must not exist (remove it)
#cellpose_masks_output_dir = os.path.join(root_dir, "Panel2/CellPose40x_23/Masks")
cellpose_masks_output_dir = os.path.join(root_dir, "Panel3/CellPose40x_23/Masks")
#cellpose_masks_output_dir = os.path.join(root_dir, "Panel2/CellPose40x_21/Masks")


# models dir
model_path = os.path.join(root_dir, 'Panel2/CellPose40x_P28_2023_train/models/TN_20230_P28')  # also for P68
#model_path = os.path.join(root_dir, 'Panel2/CellPose40x_P28_2023_train/models/TN_2021_P28')

#diameter = 14.0 # 3x blur
DIAMETER = 30.0 # 40x full resolution
# CP_20230515_144527

In [None]:
def get_files_in_folder (src, pattern):
    '''
    Args: src is the full path where to look eg '/Volumes/Glyph4TB/Projects/Tumor IF/Panel2/013022 P28Images_full'
    pattern: string like '*_composite_image.tif' to use with fnmatch.filter
    The search will include subdirectories. 
    Returns: list of full filenames
    '''
    # build list of filenames we want
    fname_list = []

    for dirpath, dir, files in os.walk(src):
        for filename in fnmatch.filter(files, pattern):
            fname = filename
            fullname = os.path.join(dirpath, filename)
            fname_list.append(fullname)
            
    return fname_list   



In [None]:
# peek at one: we should see cell masks 
files = get_files_in_folder(rgb_input_dir, '*_rgb.tif') 

# view 1 image
img = io.imread(files[-1])
plt.figure(figsize=(2,2))
plt.imshow(img)
plt.axis('off')
plt.show()

In [None]:
#@markdown ###Step 4: Enter Directory path containing the images: 
#@markdown ##### Existing Masks directory will be deleted. (Does not read images in subfolders)

Input_Directory = rgb_input_dir
input_dir = os.path.join(Input_Directory, "") #adds separator to the end regardless if path has it or not

#@markdown ###Optional: Enter image extension here to read only files/images of specified extension (.tif,.jpg..): 
#@markdown ###### Leave empty if not specifying anything
image_format = "tif" #@param {type:"string"}

##@markdown ###Tick if image is RGB: 
#RGB= False #@param {type:"boolean"}
#rgb=RGB
#save_dir = cellpose_masks_output_dir
if not os.path.exists(cellpose_masks_output_dir):
  os.makedirs(cellpose_masks_output_dir)
else:
  print("Existing Mask Directory found. You should delete it so this can rebuild it fully. {}".format(cellpose_masks_output_dir))
  #shutil.rmdir(cellpose_masks_output_dir)

#@markdown ##### Save Directory will be created in the input path under Masks

##@markdown ###Advanced Parameters
#Use_Default_Advanced_Parameters = True #@param {type:"boolean"}


# r=root, d=directories, f = files
#files=[]

# for r, d, f in os.walk(input_dir):
#     for fil in f:
#       if (image_format):
#         if fil.endswith(image_format):
#           files.append(os.path.join(r, fil))
#       else:
#         files.append(os.path.join(r, fil))
#     break #only read the root directory; can change this to include levels


if(len(files)==0):
  print("Number of images loaded: %d." %(len(files)))
  print("Cannot read image files. Check if folder has images")
else:
  print("Number of images loaded: %d." %(len(files)))


In [None]:
cellpose_masks_output_dir

In [None]:
# Hyperparameters.  Recommend keeping the defaults except for cell diameter (in pixels) 
# which should be approximated (take median, ideally) across the batch.  
# Measure, or use cellpose on a subset to estimate.  You may get better segementation by estimating this well.

flow_threshold = 0.4
cellprob_threshold = 0
diameter=DIAMETER

## RUN CELLPOSE


In [None]:

# DEFINE CELLPOSE MODEL
# model_type='cyto' or model_type='nuclei'
#model = models.Cellpose(gpu=False, model_type='cyto')
# instead of a model type (for built in models)
model = models.CellposeModel(gpu=False,
                            pretrained_model=model_path)

In [None]:
os.path.basename(model_path)

In [None]:



# define CHANNELS to run segementation on
# grayscale=0, R=1, G=2, B=3
# channels = [cytoplasm, nucleus]
# if NUCLEUS channel does not exist, set the second channel to 0
# channels = [0,0]
# IF ALL YOUR IMAGES ARE THE SAME TYPE, you can give a list with 2 elements
# channels = [0,0] # IF YOU HAVE GRAYSCALE
channels = [2,3] # IF YOU HAVE G=cytoplasm and B=nucleus
# channels = [2,1] # IF YOU HAVE G=cytoplasm and R=nucleus

# or if you have different types of channels in each image
#channels = [[2,3], [0,0], [0,0]]

# if diameter is set to None, the size of the cells is estimated on a per image basis
# you can set the average cell `diameter` in pixels yourself (recommended) 
# diameter can be a list or a single number for all images

diameter = DIAMETER

# you can run all in a list e.g.
# >>> imgs = [io.imread(filename) in for filename in files]
# >>> masks, flows, styles, diams = model.eval(imgs, diameter=None, channels=channels)
# >>> io.masks_flows_to_seg(imgs, masks, flows, diams, files, channels)
# >>> io.save_to_png(imgs, masks, flows, files)

# Try this with one file (this doens't actually save the mask as a tif); just trying it. 
for filename in files[:1]:
    print('Running cellpose on {}'.format(filename))
    img = io.imread(filename)
    masks, flows, styles = model.eval(img, 
                                      diameter=diameter, 
                                      channels=channels)

    # save results so you can load in gui; this saves a file ending in _rgb_seg.npy 
    io.masks_flows_to_seg(img, masks, flows, diameter, filename, channels)
    
    # save results as png with a 4 image panel of each part 
    # io.save_to_png(img, masks, flows, filename)

In [None]:
#@markdown ### **Step 8. Run Cellpose on folder of images**

#@markdown ###Tick if you want to save the flow image/s: 
Save_Flow= False #@param {type:"boolean"}
#@markdown ##### *Flow image will be resized when saved
save_flow=Save_Flow

#print("Running segmentation on channel %s" %(segment_channel))
#print("Using the model: ",model_choice)
#if diameter is None:
#  print("Diameter will be estimated from the image/s")
#else:
#  print(f"Cellpose will use a diameter of {diameter}")

# print(f"Using a flow threshold of: {flow_threshold} and a cell probability threshold of: {cellprob_threshold}")

#if too many images, it will lead to memory error. 
#will evaluate on a per image basis
#masks, flows, styles, diams = model.eval(imgs, diameter=diameter, flow_threshold=flow_threshold,cellprob_threshold=cellprob_threshold, channels=channels)


#save images in folder with the diameter value used in cellpose
print("Segmentation Done. Saving Masks")
print("Save Directory is: ",cellpose_masks_output_dir)
if (not os.path.exists(cellpose_masks_output_dir)):
    os.mkdir(cellpose_masks_output_dir)

if save_flow:
  print("Saving Flow")
  flows_save_dir=cellpose_masks_output_dir+"flows"+os.sep
  print("Save Directory for flows is: ",flows_save_dir)
  if (not os.path.exists(flows_save_dir)):
      os.mkdir(flows_save_dir)


for file_idx, filename in enumerate(files):
    img = io.imread(filename)    
    
    #file_name=os.path.splitext(os.path.basename(files[img_idx]))[0]
    print("\nSegmenting: ",filename)
    mask, flow, style = model.eval(img, diameter=diameter, 
                                         flow_threshold=flow_threshold,
                                         cellprob_threshold=cellprob_threshold, 
                                         channels=channels)
    #save images in folder with the diameter value used in cellpose
    print("Segmentation complete. Saving Masks")
    #Output name for masks
    mask_output_name= os.path.join(cellpose_masks_output_dir, "MASK_"+os.path.basename(filename))
    #Save mask as 16-bit in case this has to be used for detecting than 255 objects
    mask=mask.astype(np.uint16)
    #Save flow as 8-bit
    skimage.io.imsave(mask_output_name,mask, check_contrast=True)
    if save_flow:
      #Output name for flows
      flow_output_name=flows_save_dir+"FLOWS_"+filename+".tif"
      #Save as 8-bit
      flow_image=flow[0].astype(np.uint8)
      skimage.io.imsave(flow_output_name,flow_image, check_contrast=True)
    
    #save output of model eval to be loaded in GUI (_seg.npy)
    io.masks_flows_to_seg(img, masks, flows, diameter, filename, channels)

# Save parameters used in Cellpose
parameters_file= os.path.join(cellpose_masks_output_dir, "Cellpose_parameters_used.txt") 
outFile=open(parameters_file, "w") 
outFile.write("CELLPOSE PARAMETERS\n") 
outFile.write("Model: "+ os.path.basename(model_path) +"\n") 
if diameter == 0:
  diameter = "Automatically estimated by cellpose"
#outFile.write("Omni Flag: "+str(omni)+"\n") 
outFile.write("Diameter: "+str(diameter)+"\n") 
outFile.write("Flow Threshold: "+str(flow_threshold)+"\n") 
outFile.write("Cell probability Threshold: "+str(cellprob_threshold)+"\n") 
outFile.close() 
print("\nSegmentation complete and files saved")


In [None]:
# Save parameters used in Cellpose
parameters_file= os.path.join(cellpose_masks_output_dir, "Cellpose_parameters_used.txt") 
outFile=open(parameters_file, "w") 
outFile.write("CELLPOSE PARAMETERS\n") 
outFile.write("Model: "+ os.path.basename(model_path) +"\n") 
if diameter == 0:
  diameter = "Automatically estimated by cellpose"
#outFile.write("Omni Flag: "+str(omni)+"\n") 
outFile.write("Diameter: "+str(diameter)+"\n") 
outFile.write("Flow Threshold: "+str(flow_threshold)+"\n") 
outFile.write("Cell probability Threshold: "+str(cellprob_threshold)+"\n") 
outFile.close() 
print("\nSegmentation complete and files saved")

In [None]:
mask_output_name

In [None]:
files

# #41 Unstained

In [None]:
diameter

In [None]:
# DISPLAY RESULTS

#save output of model eval to be loaded in GUI
io.masks_flows_to_seg(img, masks, flows, diameter, filename, channels)


fig = plt.figure(figsize=(12,5))
plot.show_segmentation(fig, img, masks, flows[0], channels=channels)
plt.tight_layout()
plt.show()

## Need masks for unstained

Check if any cell masks were detected for unstained, and find a suitable mask to proxy so we can have some unstained cell measurements

In [None]:
unstained_list = [match for match in files if "UNST" in match]  # Unstained
unstained_list

In [None]:
# view masks
#Output name for masks
i=1
mask_output_name= os.path.join(cellpose_masks_output_dir, "MASK_"+os.path.basename(unstained_list[i]))

# view 1 image
img = io.imread(mask_output_name)
plt.figure(figsize=(2,2))
plt.imshow(img)
plt.axis('off')
plt.show()

In [None]:
# inspect the masks

os.path.exists(os.path.join(input_dir,'091221 P9HuP28 #02 S16-14778 A1_[5931,51923]_rgb_seg.npy'))

# _seg.npy format is what CellPose GUI uses
#img_array = np.load(os.path.join(input_dir,'091221 P9HuP28 #02 S16-14778 A1_[5931,51923]_rgb_seg.npy'),
#               allow_pickle=True)

#type(img_array)

#plt.imshow(img_array, cmap='gray')
#plt.show()


In [None]:
print(os.path.exists(mask_output_name))



In [None]:


# view 1 image
img = io.imread(mask_output_name)
plt.figure(figsize=(2,2))
plt.imshow(img)
plt.axis('off')
plt.show()

# Handle Unstained Samples  

Please find an existing mask file, duplicate it and rename it to precisely match the sample name of at least one unstained sample file.  We want event measurements from unstained samples even though no cells will likely be detected. 

## Export metrics

In [None]:
# check files match up
matched_list = []
for file_idx, filename in enumerate(files):

    #Output name for masks
    mask_output_name= os.path.join(cellpose_masks_output_dir, "MASK_"+os.path.basename(filename))

    if (os.path.exists(mask_output_name)):
        matched_list.append(mask_output_name)
        
        
print(len(matched_list))

# remove files not in list
all_files = get_files_in_folder(cellpose_masks_output_dir, 'MASK*.tif') 
print(len(all_files))

rmv_files = list(set(all_files) - set(matched_list))

# for rmv_fn in rmv_files:
#     print(rmv_fn)
#     os.remove(rmv_fn)

In [None]:
cellpose_masks_output_dir