### This is a notebook to run the preprocessing pipeline, upload files to DeepCell and download output, processes it, segments cells, and extract channel information

In [1]:
# import required packages
import os
import skimage.io as io
import matplotlib.pyplot as plt

from ark.utils import data_utils, load_utils, io_utils, plot_utils, deepcell_service_utils
from ark.segmentation import marker_quantification

### <span style="color:#ff0000"> All data, images, files, etc. must be placed in the 'data' directory, and referenced via '../data/path_to_your_data' regardless of if it's input or output. </span>

In [3]:
# set up file paths
base_dir = "../data/example_dataset"
input_dir = os.path.join(base_dir, "input_data")
tiff_dir = os.path.join(input_dir, "single_channel_inputs/")
deepcell_input_dir = os.path.join(input_dir, "deepcell_input/")
deepcell_output_dir = os.path.join(base_dir, 'deepcell_output')
single_cell_dir = os.path.join(base_dir, "single_cell_output")

# create directories if do not exist
for directory in [deepcell_input_dir, deepcell_output_dir, single_cell_dir]:
    if not os.path.exists(directory):
        os.makedirs(directory)


# set this to true for multi-channel tiffs
MIBItiff = False

# data file suffix for low-level processed data
# only needed for MIBItiff = True
MIBItiff_suffix = "-MassCorrected-Filtered.tiff"

# nuclear channel name(s) (or nucs = None)
nucs = ['HH3']

# membrane channel name(s) (or mems = None)
mems = ['Membrane']

# validate paths
io_utils.validate_paths([base_dir,
                         input_dir,
                         tiff_dir,
                         deepcell_input_dir,
                         deepcell_output_dir,
                         single_cell_dir
                         ])

### compute and filter fov paths

In [5]:
# either get all fovs in the folder...
if MIBItiff:
    fovs = io_utils.list_files(tiff_dir, substrs=MIBItiff_suffix)
else:
    fovs = io_utils.list_folders(tiff_dir)

# ... or optionally, select a specific set of fovs manually
# fovs = ["fov1", "fov2"]

# TODO: MIBItiff manual selection

### load images into notebook, process, and save as deepcell compatable input

In [11]:
# load channels to be included in deepcell data
channels = (nucs if nucs else []) + (mems if mems else [])

# filter channels for None (just in case)
channels = [channel for channel in channels if channel is not None]

if MIBItiff:
    data_xr = load_utils.load_imgs_from_mibitiff(tiff_dir, mibitiff_files=fovs, channels=channels)
else:
    data_xr = load_utils.load_imgs_from_tree(tiff_dir, img_sub_folder="TIFs", fovs=fovs, channels=channels)

# generate and save deepcell input tifs
data_utils.generate_deepcell_input(data_xr, deepcell_input_dir, nucs, mems)

## Upload files to Deepcell and download results

Deepcell input images will be zipped into a single file, uploaded to [deepcell.org](https://deepcell.org),

and the output will be downloaded to the deepcell output directory.

In [7]:
deepcell_service_utils.create_deepcell_output(deepcell_input_dir, deepcell_output_dir, fovs=fovs)

### We can then load the segmented mask from deepcell via label-map TIFFs and save as an xarray

In [12]:
segmentation_labels = load_utils.load_imgs_from_dir(data_dir=deepcell_output_dir,
                                                    imgdim_name='compartments',
                                                    channels=['whole_cell'],
                                                    delimiter='_feature_0',
                                                    force_ints=True)

save_name = os.path.join(deepcell_output_dir, 'segmentation_labels.xr')
if os.path.exists(save_name):
    print("overwriting previously generated processed output file")
    os.remove(save_name)

segmentation_labels.to_netcdf(save_name, format="NETCDF3_64BIT")

### We can also then save the segmented mask overlaid on the imaging data

In [16]:
for fov in data_xr.fovs:
    plot_utils.plot_overlay(segmentation_labels.loc[fov, :, :, "whole_cell"].values,
                            data_xr.loc[fov, :, :, :].values,
                            path=os.path.join(deepcell_output_dir, f'{fov.values}_overlay.tif'))

### Afterwards, we can generate expression matrices from the labeling + imaging data

In [13]:
# now extract the segmented imaging data to create normalized and transformed expression matrices
# note that if you're loading your own dataset, please make sure all the imaging data is in the same folder
# with each fov given it's own folder and all fovs having the same channels
combined_cell_size_normalized_data, combined_arcsinh_transformed_data = \
    marker_quantification.generate_cell_data(segmentation_labels=segmentation_labels,
                                             tiff_dir=tiff_dir,
                                             img_sub_folder="TIFs",
                                             is_mibitiff=MIBItiff,
                                             fovs=fovs,
                                             batch_size=5)