In [1]:
# import required packages
import warnings
warnings.simplefilter("ignore")
import sys
sys.path.append("../src")
import os
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from cell_classification.application import Nimbus
from alpineer import io_utils
from cell_classification.viewer_widget import NimbusViewer
import tensorflow as tf

## 0: Set root directory and download example dataset
Here we are using the example data located in `/data/example_dataset/input_data`. To modify this notebook to run using your own data, simply change `base_dir` to point to your own sub-directory within the data folder. Set `base_dir`, the path to all of your imaging data (i.e. multiplexed images and segmentation masks). Subdirectory `nimbus_output` will contain all of the data generated by this notebook. In the following, we expect this folder structure:
```
|-- base_dir
|   |-- image_data
|   |   |-- fov_1
|   |   |-- fov_2
|   |-- segmentation
|   |   |-- deepcell_output
|   |-- cell_classification
```

In [2]:
# set up the base directory
base_dir = "E:/angelo_lab/data/TONIC/raw"

## 1: set file paths and parameters

### All data, images, files, etc. must be placed in the 'data' directory, and referenced via '../data/path_to_your_data'


In [3]:
# set up file paths
tiff_dir = os.path.join(base_dir, "image_data", "samples")
deepcell_output_dir = os.path.join(base_dir, "segmentation_data", "deepcell_output")
nimbus_output_dir = os.path.join(base_dir, "segmentation_data", "nimbus_output")

# Create nimbus output directory
os.makedirs(nimbus_output_dir, exist_ok=True)

# Check if paths exist
io_utils.validate_paths([base_dir, tiff_dir, deepcell_output_dir, nimbus_output_dir])

## 2: Set up input paths and the naming convention for the segmentation data
Store names of channels to exclude in the list below. Either predict all FOVs or specify manually the ones you want to apply nimbus on. The `segmentation_naming_convention` maps a FOV path to the according instance segmentation output path. Please make sure, that `segmentation_naming_convention` returns the path to the correct cell segmentation output. 

In [4]:
# define the channels to exclude
exclude_channels = [
    'H3K9ac', 'H3K27me3', "Au", "Fe", "Noodle", "Ca", "CD11c_nuc_exclude", "CK17_smoothed",
    "Collagen1", "ECAD_smoothed", "FOXP3_nuc_include", "SMA", "VIM"
]

# either get all fovs in the folder...
fov_names = os.listdir(tiff_dir)
# ... or optionally, select a specific set of fovs manually
# fovs = ["fov0", "fov1"]

fov_paths = [os.path.join(tiff_dir, fov_name) for fov_name in fov_names]

# define the naming convention for the segmentation data
def segmentation_naming_convention(fov_path):
    """Prepares the path to the segmentation data for a given fov
    Args:
        fov_path (str): path to fov
    Returns:
        seg_path (str): paths to segmentation fovs
    """
    fov_name = os.path.basename(fov_path)
    return os.path.join(
        deepcell_output_dir, fov_name + "_feature_0.tif"
    )

# test segmentation naming convention
if os.path.exists(segmentation_naming_convention(fov_paths[0])):
    print("Segmentation data exists for fov 0 and naming convention is correct")
else:
    print("Segmentation data does not exist for fov 0 or naming convention is incorrect")

Segmentation data exists for fov 0 and naming convention is correct


## 3: Load model and initialize Nimbus application
The following code initializes the Nimbus application and loads the model checkpoint. The model was trained on a diverse set of tissues, protein markers, imaging platforms and cell types and doesn't need re-training.

In [5]:
nimbus = Nimbus(
    fov_paths=fov_paths,
    segmentation_naming_convention=segmentation_naming_convention,
    output_dir=nimbus_output_dir,
    exclude_channels=exclude_channels,
    save_predictions=True,
)

# check if all inputs are valid
nimbus.check_inputs()

Loaded weights from E:\angelo_lab\cell_classification\checkpoints\halfres_512_checkpoint_160000.h5
All inputs are valid.


## 4: Prepare normalization dictionary 
The next step is to iterate through all the fovs and calculate the 0.999 marker expression quantile for each marker individually. This is used for normalizing the marker expressions prior to predicting marker confidence scores with our model. You can set `n_subset` to estimate the quantiles on a small subset of the data and you can set `multiprocessing` to speed up computation.

In [6]:
nimbus.prepare_normalization_dict(
    n_subset=5,
    multiprocessing=True,
    overwrite=True
)

Iterate over fovs...


  0%|          | 0/5 [00:00<?, ?it/s]

## 5: Make predictions with the model
Nimbus will iterate through your samples and store predictions and a file named `nimbus_cell_table.csv` that contains the mean-per-cell predicted marker confidence scores in the sub-directory called `nimbus_output`.

In [7]:
cell_table = nimbus.predict_fovs()

Available GPUs:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
Predictions will be saved in E:/angelo_lab/data/TONIC/raw\segmentation_data\nimbus_output
Iterating through fovs will take a while...


  0%|          | 0/5 [00:00<?, ?it/s]

## 6: View multiplexed channels and Nimbus predictions side-by-side
Select an FOV and one marker image per channel to inspect the imaging data and associated Nimbus predictions

In [16]:
viewer = NimbusViewer(input_dir=tiff_dir, output_dir=nimbus_output_dir)
viewer.display()

HBox(children=(VBox(children=(HTML(value='<h2>Select files</h2>'), Select(description='FOV:', options=('TONIC_…