## This notebook takes the output of deepcell, processes it, segments cells, and outputs the extracted channel information

In [2]:
import os

import matplotlib.pyplot as plt
import skimage.io as io
import numpy as np

from ark.utils import data_utils, segmentation_utils, io_utils, test_utils
from ark.segmentation import marker_quantification

C:\Users\kevin\AppData\Local\Continuum\anaconda3\lib\site-packages\numpy\.libs\libopenblas.IPBC74C7KURV7CB2PKT5Z5FNR3SIBV4J.gfortran-win_amd64.dll
C:\Users\kevin\AppData\Local\Continuum\anaconda3\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
  stacklevel=1)


In [3]:
# set up file paths
base_dir = "../data/example_dataset"
input_dir = os.path.join(base_dir, "input_data")
deepcell_input_dir = os.path.join(input_dir, 'deepcell_input')
tiff_dir = os.path.join(input_dir, 'single_channel_inputs')
label_dir = os.path.join(base_dir, 'deepcell_output')

In [4]:
# set to true if base images are MIBItiffs
MIBItiff = False

# points to look at (None for all)
points = None

In [5]:
# validate file paths (add extra paths to this list)
io_utils.validate_paths([
    base_dir,
    input_dir,
    deepcell_input_dir,
    tiff_dir,
    label_dir,
])

### We compute the paths for the deepcell input TIFFs

In [6]:
if points is None or points == []:
    points_input = io_utils.list_files(deepcell_input_dir, substrs=['tif'])
else:
    points_input = io_utils.list_files(deepcell_input_dir, substrs=points)

### We can then load the segmented mask from deepcell via label-map TIFFs and save as an xarray

In [7]:
segmentation_labels = data_utils.load_imgs_from_dir(data_dir=label_dir,
                                                    imgdim_name='compartments',
                                                    image_name='whole_cell',
                                                    delimiter='_feature_0',
                                                    force_ints=True)

save_name = os.path.join(label_dir, 'segmentation_labels.xr')
if os.path.exists(save_name):
    print("overwriting previously generated processed output file")
    os.remove(save_name)

segmentation_labels.to_netcdf(save_name, format="NETCDF3_64BIT")

overwriting previously generated processed output file


### We can also then save the segmented mask overlaid on the imaging data

In [8]:
# get input data for overlay
input_data_xr = data_utils.load_imgs_from_multitiff(deepcell_input_dir, multitiff_files=points_input)
print(type(input_data_xr.channels.values))
print(input_data_xr.channels.values[:2])
segmentation_utils.visualize_segmentation(
            segmentation_labels_xr=segmentation_labels,
            fovs=input_data_xr.fovs, channel_data_xr=input_data_xr,
            overlay_channels=[input_data_xr.channels.values[:2]],
            output_dir=label_dir, save_tifs='all')
"""
for fov in input_data_xr.fovs:
    plot_utils.plot_overlay(segmentation_labels.loc[fov, :, :, "whole_cell"].values,
                            input_data_xr.loc[fov, :, :, :].values,
                            path=os.path.join(label_dir, f'{fov.values}_overlay.tif'))
                            """

<class 'numpy.ndarray'>
[0 1]


OSError: [Errno 22] Invalid argument: "C:\\Users\\kevin\\PycharmProjects\\ark-analysis\\data\\example_dataset\\deepcell_output\\<xarray.DataArray 'fovs' ()>\narray('Point8', dtype='<U6')\nCoordinates:\n    fovs     <U6 'Point8'_0_1_overlay.tiff"

### Afterwards, we can generate expression matrices from the labeling + imaging data

Returns:
* cell_size_normalized_data: computed by dividing the marker counts in segmentation_labels by their corresponding cell size.
* arcsinh_transformed_data: first, linearly scale each value of cell_size_normalized_data by multiplying by 100. Then, pass the linearly scaled cell_size_normalized_data through the arcsinh function.

In [None]:
# now extract the segmented imaging data to create normalized and transformed expression matrices
# note that if you're loading your own dataset, please make sure all the imaging data is in the same folder
# with each FOV given it's own folder and all FOVs having the same channels
combined_cell_size_normalized_data, combined_arcsinh_transformed_data = \
    marker_quantification.compute_complete_expression_matrices(segmentation_labels=segmentation_labels,
                                                               tiff_dir=tiff_dir,
                                                               img_sub_folder="TIFs",
                                                               is_mibitiff=MIBItiff,
                                                               points=points,
                                                               batch_size=5)

In [None]:
# specify the path to the single_cell_output folder, and create it if it doesn't already exist
# this is where we will store our combined_normalized_data and combined_transformed_data output
single_cell_dir = os.path.join(base_dir, "single_cell_output")

if not os.path.exists(single_cell_dir):
    os.makedirs(single_cell_dir)

In [None]:
# save output as CSV
combined_cell_size_normalized_data.to_csv(os.path.join(single_cell_dir, 'cell_size_normalized_data.csv'), index=False)
combined_arcsinh_transformed_data.to_csv(os.path.join(single_cell_dir, 'arcsinh_transformed_data.csv'), index=False)