## Perform cell segmentation using Mesmer on Spain and Stanford cohorts

In [None]:
import os
import warnings
from alpineer import io_utils
from skimage import io
from ark.segmentation import marker_quantification, segmentation_utils
from ark.utils import deepcell_service_utils, example_dataset, plot_utils

In [None]:
base_dir = "Z:\\Noah Greenwald\\TNBC_Cohorts\\SPAIN" #BELLINI #STANFORD #SPAIN

In [None]:
sample_type = 'samples'
tiff_dir = os.path.join(base_dir, "image_data", sample_type)
cell_table_dir = os.path.join(base_dir, "segmentation", sample_type, "cell_table")
deepcell_input_dir = os.path.join(base_dir, "segmentation", sample_type, "deepcell_input")
deepcell_output_dir = os.path.join(base_dir, "segmentation", sample_type, "deepcell_output")
deepcell_visualization_dir = os.path.join(base_dir, "segmentation", sample_type, "deepcell_visualization")

In [None]:
for directory in [cell_table_dir, deepcell_input_dir, deepcell_output_dir, deepcell_visualization_dir]:
    if not os.path.exists(directory):
        os.makedirs(directory)

In [None]:
io_utils.validate_paths([base_dir,
                         tiff_dir,
                         deepcell_input_dir,
                         deepcell_output_dir,
                         cell_table_dir,
                         deepcell_visualization_dir
                         ])

In [None]:
fovs = io_utils.list_folders(tiff_dir)

In [None]:
## nuclear markers
nucs = ['H3K9ac', 'H3K27me3']

## membrane markers
mems = ['CD14', 'CD38', 'CD45', 'ECAD', 'CK17']

In [None]:
deepcell_service_utils.generate_deepcell_input(
    deepcell_input_dir,
    tiff_dir,
    nucs,
    mems,
    fovs,
    img_sub_folder=None
)

In [None]:
rescale_factor = 1.0
deepcell_service_utils.create_deepcell_output(deepcell_input_dir, deepcell_output_dir, fovs=fovs, scale=rescale_factor)

In [None]:
# display the channel overlay for a fov, useful for quick verification
warnings.simplefilter("ignore")

fov_to_display = io_utils.remove_file_extensions([fovs[0]])[0]

fov_overlay = plot_utils.create_overlay(
    fov=fov_to_display,
    segmentation_dir=deepcell_output_dir,
    data_dir=deepcell_input_dir,
    img_overlay_chans=['nuclear_channel', 'membrane_channel'],
    seg_overlay_comp='whole_cell'
)

_ = io.imshow(fov_overlay)

In [None]:
segmentation_utils.save_segmentation_labels(
    segmentation_dir=deepcell_output_dir,
    data_dir=deepcell_input_dir,
    output_dir=deepcell_visualization_dir,
    fovs=io_utils.remove_file_extensions(fovs),
    channels=['nuclear_channel', 'membrane_channel']
)

In [None]:
nuclear_counts = True
fast_extraction = False

In [None]:
cell_table_size_normalized, cell_table_arcsinh_transformed = \
    marker_quantification.generate_cell_table(segmentation_dir=deepcell_output_dir,
                                              tiff_dir=tiff_dir,
                                              img_sub_folder=None,
                                              fovs=fovs,
                                              batch_size=5,
                                              nuclear_counts=nuclear_counts,
                                              fast_extraction=fast_extraction)

In [None]:
compression = None
cell_table_size_normalized.to_csv(os.path.join(cell_table_dir, f'cell_table_size_normalized_{sample_type}.csv'),
                                  compression=compression, index=False)
cell_table_arcsinh_transformed.to_csv(os.path.join(cell_table_dir, f'cell_table_arcsinh_transformed_{sample_type}.csv'),
                                      compression=compression, index=False)

## Separate out the cell tables

In [None]:
ctsn_sample = cell_table_size_normalized.loc[cell_table_size_normalized["fov"].str.contains("R\d+C\d+"), :]
ctsn_control = cell_table_size_normalized.loc[~cell_table_size_normalized["fov"].str.contains("R\d+C\d+"), :]

In [None]:
ctsn_sample.to_csv(os.path.join(cell_table_dir, 'cell_table_size_normalized_sample.csv'),
                   compression=compression, index=False)
ctsn_control.to_csv(os.path.join(cell_table_dir, 'cell_table_size_normalized_control.csv'),
                    compression=compression, index=False)

In [None]:
ctat_sample = cell_table_arcsinh_transformed.loc[cell_table_arcsinh_transformed["fov"].str.contains("R\d+C\d+"), :]
ctat_control = cell_table_arcsinh_transformed.loc[~cell_table_arcsinh_transformed["fov"].str.contains("R\d+C\d+"), :]

In [None]:
ctat_sample.to_csv(os.path.join(cell_table_dir, 'cell_table_arcsinh_transformed_sample.csv'),
                   compression=compression, index=False)
ctat_control.to_csv(os.path.join(cell_table_dir, 'cell_table_arcsinh_transformed_control.csv'),
                    compression=compression, index=False)