In [1]:
# import required packages
import sys
sys.path.append("../src")
import tensorflow as tf
import os
from tifffile import imwrite
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import cv2
import pandas as pd
import random
import json
from cell_classification.inference import prepare_normalization_dict, predict
from cell_classification.application import CellClassification

  from tqdm.autonotebook import tqdm


In [2]:
import os
from alpineer import io_utils

## 0: Set root directory and download example dataset
Here we are using the example data located in `/data/example_dataset/input_data`. To modify this notebook to run using your own data, simply change `base_dir` to point to your own sub-directory within the data folder. Set `base_dir`, the path to all of your imaging data (i.e. multiplexed images and segmentation masks). Subdirectory `cell_classification` will contain all of the data generated by this notebook. In the following, we expect this folder structure:
```
|-- base_dir
|   |-- image_data
|   |   |-- fov_1
|   |   |-- fov_2
|   |-- segmentation
|   |   |-- deepcell_output
|   |-- cell_classification
```

In [3]:
# set up the base directory
base_dir = "E:/angelo_lab/data/TONIC/raw"

## 1: set file paths and parameters

### All data, images, files, etc. must be placed in the 'data' directory, and referenced via '../data/path_to_your_data'

If you're interested in directly interfacing with Google Drive, consult the documentation [here](https://ark-analysis.readthedocs.io/en/latest/_rtd/google_docs_usage.html).

In [4]:
# set up file paths
tiff_dir = os.path.join(base_dir, "image_data/samples")
deepcell_output_dir = os.path.join(base_dir, "segmentation_data/deepcell_output")
nimbus_output_dir = os.path.join(base_dir, "segmentation_data/nimbus_output")

## 2: Load data and prepare normalization dictionary
The next step is to iterate through all the fovs and calculate the 0.999 marker expression quantile for each marker individually. This is used for normalizing the marker expressions prior to predicting marker positivity/negativity with our model.

In [5]:
# Make output directory
os.makedirs(nimbus_output_dir, exist_ok=True)

# define the channels to exclude
exclude_channels = ['H3K9ac', 'H3K27me3', "Au", "Fe", "Noodle", "Ca"]

# either get all fovs in the folder...
fov_names = os.listdir(tiff_dir)
# ... or optionally, select a specific set of fovs manually
# fovs = ["fov0", "fov1"]

fov_paths = [os.path.join(tiff_dir, fov_name) for fov_name in fov_names]

# Prepare or load training data normalization dict
normalization_dict = prepare_normalization_dict(
    fov_paths,
    output_dir=nimbus_output_dir,
    exclude_channels=exclude_channels,
    n_jobs=16)
# normalization_dict = json.load(open(os.path.join(cell_classification_output_dir, 'normalization_dict.json')))





Iterate over fovs...


  0%|          | 0/10 [00:00<?, ?it/s]

## 3: Load model and initialize deepcell application
The following code initializes the deepcell application and loads the model checkpoint. The checkpoint needs to be downloaded from [here](https://charitede-my.sharepoint.com/:u:/g/personal/josef-lorenz_rumberger_charite_de/Ed5iVEMreE5DqJ_WczdXS9EBFeD75ZmaLdYWXENvUvUbSg?e=r2hxK8) and put under path `checkpoints/checkpoint_125000.h5`.

In [6]:
# load model
checkpoint_path = os.path.normpath("../checkpoints/halfres_512_checkpoint_160000.h5")


# change this function to match your segmentation naming convention
def segmentation_naming_convention(fov_path):
    """Prepares the path to the segmentation data for a given fov
    Args:
        fov_path (str): path to fov
    Returns:
        seg_path (str): paths to segmentation fovs
    """
    fov_name = os.path.basename(fov_path)
    return os.path.join(
        deepcell_output_dir, fov_name + "_feature_0.tif"
    )


app = CellClassification()
app.load_weights(checkpoint_path)

Loaded weights from ..\checkpoints\halfres_512_checkpoint_160000.h5


## 4: Make predictions with the model
Determine if you want to (a) plot the predictions, (b) save the prediction images and (c) use test-time augmentation during inference. The script will iterate through your samples and store predictions and a file named `pred_cell_table.csv` that contains the mean-per-cell predicted marker activity.

In [11]:
# plot and save images

plot_predictions = False
save_predictions = True
test_time_aug = True
half_resolution = True

cell_table = predict(
            fov_paths,
            nimbus_output_dir,
            app,
            normalization_dict,
            segmentation_naming_convention,
            exclude_channels=exclude_channels,
            plot_predictions=True,
            save_predictions=True,
            half_resolution=False,
            )


NameError: name 'prepare_input_data' is not defined