### This is a notebook to format your data for segmentation, run the images through the cloud instance of Mesmer, and then extract marker counts and morphological information from all the cells in your images.  This version also exemplifies Google Drive interoperability.

In [None]:
# import required packages
import os
import warnings
import skimage.io as io
import matplotlib.pyplot as plt
import xarray as xr
import numpy as np

from ark.utils import data_utils, deepcell_service_utils, io_utils, load_utils, io_utils, plot_utils, segmentation_utils
from ark.utils.google_drive_utils import init_google_drive_api, GoogleDrivePath, path_join, drive_write_out, DriveOpen
from ark.segmentation import marker_quantification

### To use Google Drive with ark-analysis, you'll first have to initialize it via `init_google_drive_api` and a universal passcode.  To get access to this passcode and Google Drive, email someone  -\\\_('\_' )_/-

In [None]:
init_google_drive_api("passcode")

### All data, images, files, etc. must be placed in the 'data' directory, and referenced via '../data/path_to_your_data'.

The syntax for creating a `GoogleDrivePath` is very similar to that of making a regular path string.  Some general usage tips:
 * Use `path_join` instead of `os.path.join` when combining filepaths
 * Use `parent / child` syntax when possible to visually differentiate Drive paths from local paths

In [None]:
# set up file paths
base_dir = GoogleDrivePath('/test_output')
input_dir = base_dir / 'input_data'
tiff_dir = input_dir / 'single_channel_inputs'

#base_local_dir = '../data/gdrive_testouts'
deepcell_input_dir = input_dir / "deepcell_input_test"
deepcell_output_dir = base_dir / 'deepcell_output_test'
single_cell_dir = base_dir / "single_cell_output_test"
viz_dir = base_dir / "deepcell_visualization_test"

In [None]:
# create directories if do not exist
for directory in [deepcell_input_dir, deepcell_output_dir, single_cell_dir, viz_dir]:
    if type(directory) is GoogleDrivePath:
        directory.mkdir()
        print(directory.fileID)
    elif not os.path.exists(directory):
        os.makedirs(directory)

### To get access to the test data used here, email someone, and create a Drive shortcut in your root directory

In [None]:
# validate paths
io_utils.validate_paths([base_dir,
                         input_dir,
                         tiff_dir,
                         deepcell_input_dir,
                         deepcell_output_dir,
                         single_cell_dir,
                         viz_dir
                         ])

### compute and filter fov paths

In [None]:
# set this to true for multi-channel tiffs
MIBItiff = False

# data file suffix for low-level processed data
# only needed for MIBItiff = True
MIBItiff_suffix = "-MassCorrected-Filtered.tiff"

### We can remotely list all the fovs in our Google Drive folder

In [None]:
# either get all fovs in the folder...
if MIBItiff:
    fovs = io_utils.list_files(tiff_dir, substrs=MIBItiff_suffix)
else:
    fovs = io_utils.list_folders(tiff_dir)

# ... or optionally, select a specific set of fovs manually
# fovs = ["fov1", "fov2"]

print(fovs)

# TODO: MIBItiff manual selection

In [None]:
# NOTE: at least one of nucs and mems must not be None
# nuclear channel name(s) (or nucs = None)
nucs = ['HH3']

# membrane channel name(s) (or mems = None)
mems = None

In [None]:
# load channels to be included in Mesmer data
channels = (nucs if nucs else []) + (mems if mems else [])

# filter channels for None (just in case)
channels = [channel for channel in channels if channel is not None]

### Here we download the required image data directly from Drive.  No image data is saved locally!

In [None]:
if MIBItiff:
    data_xr = load_utils.load_imgs_from_mibitiff(tiff_dir, mibitiff_files=fovs, channels=channels)
else:
    data_xr = load_utils.load_imgs_from_tree(tiff_dir, img_sub_folder="TIFs", fovs=fovs, channels=channels)

### Note: If `deepcell_input_dir` is a local drive, the inputs for deepcell will be saved locally.  However, if `deepcell_input_dir` is a `GoogleDrivePath`, then the inputs will be automatically saved to the specified Drive folder.

### In general, it's best to save 'mid-processed' data locally to avoid upload/download slow downs, and only upload data directly if it's 'final'.  That advice is ignored here for demonstration purposes

In [None]:
# generate and save deepcell input tifs
data_utils.generate_deepcell_input(data_xr, deepcell_input_dir, nucs, mems)

## Upload files to Deepcell and download results

Deepcell input images will be zipped into a single file, uploaded to [deepcell.org](https://deepcell.org),

and the output will be downloaded to the deepcell output directory on Google Drive.

In [None]:
deepcell_service_utils.create_deepcell_output(deepcell_input_dir, deepcell_output_dir, fovs=fovs)

### We can then load the segmented mask from deepcell via label-map TIFFs and save as an xarray

In [None]:
segmentation_labels_cell = load_utils.load_imgs_from_dir(data_dir=deepcell_output_dir,
                                                    xr_dim_name='compartments',
                                                    xr_channel_names=['whole_cell'],
                                                    trim_suffix='_feature_0',
                                                    match_substring='_feature_0',
                                                    force_ints=True)

segmentation_labels_nuc = load_utils.load_imgs_from_dir(data_dir=deepcell_output_dir,
                                                    xr_dim_name='compartments',
                                                    xr_channel_names=['nuclear'],
                                                    trim_suffix='_feature_1',
                                                    match_substring='_feature_1',
                                                    force_ints=True)

segmentation_labels = xr.DataArray(np.concatenate((segmentation_labels_cell.values,
                                                  segmentation_labels_nuc.values),
                                                  axis=-1),
                                   coords=[segmentation_labels_cell.fovs, 
                                           segmentation_labels_cell.rows,
                                           segmentation_labels_cell.cols,
                                           ['whole_cell', 'nuclear']],
                                   dims=segmentation_labels_cell.dims)

### We can now extract the segmented imaging data to create normalized and transformed expression matrices

### Note: if you're loading your own dataset, please make sure all the imaging data is in the same folder with each fov given its own folder and all fovs having the same channels.

For a full list of features extracted, please refer to the cell table section of: https://ark-analysis.readthedocs.io/en/latest/_rtd/data_types.html

In [None]:
cell_table_size_normalized, cell_table_arcsinh_transformed = \
    marker_quantification.generate_cell_table(segmentation_labels=segmentation_labels,
                                              tiff_dir=tiff_dir,
                                              img_sub_folder="TIFs",
                                              is_mibitiff=MIBItiff,
                                              fovs=fovs,
                                              batch_size=5,
                                              nuclear_counts=True)

### Here we write out our expression matrices (aka cell tables) to Google Drive.  This syntax looks a little odd and warrants some explanation.

With normal local paths, we'd simply run the command:
```
cell_table_arcsinh_transformed.to_csv(os.path.join(single_cell_dir, 'cell_table_archsinh_transformed.csv'), index=False)
```
or, generalized:
```
df.to_csv(my_path, *args, **kwargs)
```

In this case, we're relying on the pandas `DataFrame` method `to_csv` to write our data out.  Since pandas has no affiliation with ark-analysis, it doesn't know about `GoogleDrivePaths`, so it would be fairly upset if it was given `my_path = GoogleDrivePath('/some_path')` as an argument.

---

To deal with cases like these, we utilize the `drive_write_out` function/pattern.  We pass the GoogleDrivePath, as well as a lambda function, to accomplish the write out.  For the generalized example, this looks like the following:

```
drive_write_out( my_path, lambda x: df.to_csv(x, *args, **kwargs) )
```

Note that this `lambda` function could also be a regular function:
```
def to_csv_wrapper(x):
    df.to_csv(x, *args, **kwargs)
```

In [None]:
cell_table_arcsinh_path = path_join(single_cell_dir, 'cell_table_arcsinh_transformed.csv')
cell_table_normalized_path = path_join(single_cell_dir, 'cell_table_size_normalized.csv')

drive_write_out(
    cell_table_normalized_path,
    lambda x: cell_table_size_normalized.to_csv(x, index=False)
)

drive_write_out(
    cell_table_arcsinh_path,
    lambda x: cell_table_arcsinh_transformed.to_csv(x, index=False)
)