Skip to content

Commit

Permalink
Merge 9a3473e into 1776cc7
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-l-kong committed Feb 17, 2021
2 parents 1776cc7 + 9a3473e commit 1cecff9
Show file tree
Hide file tree
Showing 6 changed files with 704 additions and 85 deletions.
272 changes: 200 additions & 72 deletions ark/segmentation/marker_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,155 @@

import xarray as xr

from skimage.measure import regionprops_table
from skimage.measure import regionprops, regionprops_table

from ark.utils import io_utils, load_utils, misc_utils, segmentation_utils
from ark.segmentation.signal_extraction import extraction_function
from ark.segmentation.signal_extraction import EXTRACTION_FUNCTION
from ark.segmentation.regionprops_extraction import REGIONPROPS_FUNCTION

import ark.settings as settings


def get_single_compartment_props(segmentation_labels, regionprops_base,
regionprops_single_comp, **kwargs):
"""Gets regionprops features from the provided segmentation labels for a fov
Based on segmentation labels from a single compartment
Args:
segmentation_labels (numpy.ndarray):
rows x columns matrix of masks
regionprops_base (list):
base morphology features directly computed by regionprops to extract for each cell
regionprops_single_comp (list):
list of single compartment extra properties derived from regionprops to compute
**kwargs:
Arbitrary keyword arguments for compute_extra_props
Returns:
pandas.DataFrame:
Contains the regionprops info (base and derived) for each labeled cell
"""

# verify that all the regionprops single compartment featues provided actually exist
misc_utils.verify_in_list(
extras_props=regionprops_single_comp,
props_options=list(REGIONPROPS_FUNCTION.keys())
)

# get the base features
cell_props = pd.DataFrame(regionprops_table(segmentation_labels,
properties=regionprops_base))

# define an empty list for each regionprop feature
cell_props_single_comp = {re: [] for re in regionprops_single_comp}

# get regionprop info needed for single compartment computations
props = regionprops(segmentation_labels)

# generate the required data for each cell
for prop in props:
for re in regionprops_single_comp:
cell_props_single_comp[re].append(REGIONPROPS_FUNCTION[re](prop, **kwargs))

# convert the dictionary to a DataFrame
cell_props_single_comp = pd.DataFrame.from_dict(cell_props_single_comp)

# append the single compartment derived props info to the cell_props DataFrame
cell_props = pd.concat([cell_props, cell_props_single_comp], axis=1)

return cell_props


def assign_single_compartment_features(marker_counts, compartment, cell_props, cell_coords,
cell_id, label_id, input_images, regionprops_names,
extraction, **kwargs):
"""Assign computed regionprops features and signal intensity to cell_id in marker_counts
Args:
marker_counts (xarray.DataArray):
xarray containing segmentaed data of cells x markers
compartment (str):
either 'whole_cell' or 'nuclear'
cell_props (pandas.DataFrame):
regionprops information for each cell
cell_coords (numpy.ndarray):
values representing pixels within one cell
cell_id (int):
id of the cell
label_id (int):
id used to index into cell_props
input_images (xarray.DataArray):
rows x columns x channels matrix of imaging data
regionprops_names (list):
all of the regionprops features (including derived, except nuclear-specific)
extraction (str):
the extraction method to use for signal intensity calculation
**kwargs:
arbitrary keyword arguments
Returns:
xarray.DataArray:
the updated marker_counts matrix with data for the specified cell_id and compartment
"""

# get centroid corresponding to current cell
kwargs['centroid'] = np.array((
cell_props.loc[cell_props['label'] == label_id, 'centroid-0'].values,
cell_props.loc[cell_props['label'] == label_id, 'centroid-1'].values
)).T

# calculate the total signal intensity within cell
cell_counts = EXTRACTION_FUNCTION[extraction](cell_coords, input_images, **kwargs)

# get morphology metrics
current_cell_props = cell_props.loc[cell_props['label'] == label_id, regionprops_names]

# combine marker counts and morphology metrics together
cell_features = np.concatenate((cell_counts, current_cell_props), axis=None)

# add counts of each marker to appropriate column
marker_counts.loc[compartment, cell_id, marker_counts.features[1]:] = cell_features

# add cell size to first column
marker_counts.loc[compartment, cell_id, marker_counts.features[0]] = cell_coords.shape[0]

return marker_counts


def assign_multi_compartment_features(marker_counts, regionprops_multi_comp, **kwargs):
"""Assigns features to marker_counts that depend on multiple compartments
Args:
marker_counts (xarray.DataArray):
xarray containing segmentaed data of cells x markers
regionprops_multi_comp (list):
list of multi-compartment properties derived from regionprops to compute,
each value should correspond to a value in REGIONPROPS_FUNCTION
**kwargs:
arbitrary keyword arguments
Returns:
xarray.DataArray:
the updated marker_counts matrix with data for the specified cell_id and compartment
"""

misc_utils.verify_in_list(
nuclear_props=regionprops_multi_comp,
props_options=list(REGIONPROPS_FUNCTION.keys())
)

for rn in regionprops_multi_comp:
marker_counts = REGIONPROPS_FUNCTION[rn](marker_counts, **kwargs)

return marker_counts


def compute_marker_counts(input_images, segmentation_labels, nuclear_counts=False,
regionprops_features=None, split_large_nuclei=False,
regionprops_base=copy.deepcopy(settings.REGIONPROPS_BASE),
regionprops_single_comp=copy.deepcopy(settings.REGIONPROPS_SINGLE_COMP),
regionprops_multi_comp=copy.deepcopy(settings.REGIONPROPS_MULTI_COMP),
split_large_nuclei=False,
extraction='total_intensity', **kwargs):
"""Extract single cell protein expression data from channel TIFs for a single fov
Expand All @@ -25,54 +164,62 @@ def compute_marker_counts(input_images, segmentation_labels, nuclear_counts=Fals
rows x columns x compartment matrix of masks
nuclear_counts (bool):
boolean flag to determine whether nuclear counts are returned
regionprops_features (list):
morphology features for regionprops to extract for each cell
regionprops_base (list):
base morphology features directly computed by regionprops to extract for each cell
regionprops_single_comp (list):
list of single compartment extra properties derived from regionprops to compute
regionprops_multi_comp (list):
list of multi compartment extra properties derived from regionprops to compute
split_large_nuclei (bool):
controls whether nuclei which have portions outside of the cell will get relabeled
extraction (str):
extraction function used to compute marker counts.
**kwargs:
arbitrary keyword arguments
arbitrary keyword arguments for get_cell_props
Returns:
xarray.DataArray:
xarray containing segmented data of cells x markers
"""

misc_utils.verify_in_list(
extraction=extraction,
extraction_options=list(extraction_function.keys())
extraction_options=list(EXTRACTION_FUNCTION.keys())
)

if regionprops_features is None:
regionprops_features = ['label', 'area', 'eccentricity', 'major_axis_length',
'minor_axis_length', 'perimeter', 'centroid']

if 'coords' not in regionprops_features:
regionprops_features.append('coords')
if 'coords' not in regionprops_base:
regionprops_base.append('coords')

# labels are required
if 'label' not in regionprops_features:
regionprops_features.append('label')
if 'label' not in regionprops_base:
regionprops_base.append('label')

# centroid is required
if not any(['centroid' in rpf for rpf in regionprops_features]):
regionprops_features.append('centroid')
if not any(['centroid' in rpf for rpf in regionprops_base]):
regionprops_base.append('centroid')

# enforce post channel column is present and first
if regionprops_features[0] != settings.POST_CHANNEL_COL:
if settings.POST_CHANNEL_COL in regionprops_features:
regionprops_features.remove(settings.POST_CHANNEL_COL)
regionprops_features.insert(0, settings.POST_CHANNEL_COL)
if regionprops_base[0] != settings.POST_CHANNEL_COL:
if settings.POST_CHANNEL_COL in regionprops_base:
regionprops_base.remove(settings.POST_CHANNEL_COL)
regionprops_base.insert(0, settings.POST_CHANNEL_COL)

# create variable to hold names of returned columns only
regionprops_names = copy.copy(regionprops_features)
regionprops_names = copy.copy(regionprops_base)
regionprops_names.remove('coords')

# centroid returns two columns, need to modify names
if np.isin('centroid', regionprops_names):
regionprops_names.remove('centroid')
regionprops_names += ['centroid-0', 'centroid-1']

# add the single compartment features to regionprops_names
regionprops_names.extend(regionprops_single_comp)

# add the multi compartment features to regionprops_names
regionprops_names.extend(regionprops_multi_comp)

# get all the cell ids
unique_cell_ids = np.unique(segmentation_labels[..., 0].values)
unique_cell_ids = unique_cell_ids[np.nonzero(unique_cell_ids)]

Expand All @@ -90,9 +237,13 @@ def compute_marker_counts(input_images, segmentation_labels, nuclear_counts=Fals
feature_names],
dims=['compartments', 'cell_id', 'features'])

# get the regionprops kwargs
reg_props = kwargs.get('regionprops_kwargs', {})

# get regionprops for each cell
cell_props = pd.DataFrame(regionprops_table(segmentation_labels.loc[:, :, 'whole_cell'].values,
properties=regionprops_features))
cell_props = get_single_compartment_props(segmentation_labels.loc[:, :, 'whole_cell'].values,
regionprops_base, regionprops_single_comp,
**reg_props)

if nuclear_counts:
nuc_labels = segmentation_labels.loc[:, :, 'nuclear'].values
Expand All @@ -104,66 +255,43 @@ def compute_marker_counts(input_images, segmentation_labels, nuclear_counts=Fals
nuc_segmentation_labels=nuc_labels,
cell_ids=unique_cell_ids)

nuc_props = pd.DataFrame(regionprops_table(nuc_labels, properties=regionprops_features))
nuc_props = get_single_compartment_props(segmentation_labels.loc[:, :, 'nuclear'].values,
regionprops_base, regionprops_single_comp,
**reg_props)

# get the signal kwargs
sig_kwargs = kwargs.get('signal_kwargs', {})

# TODO: There's some repeated code here, maybe worth refactoring? Maybe not
# loop through each cell in mask
for cell_id in cell_props['label']:
# get coords corresponding to current cell.
cell_coords = cell_props.loc[cell_props['label'] == cell_id, 'coords'].values[0]

# get centroid corresponding to current cell
kwargs['centroid'] = np.array((
cell_props.loc[cell_props['label'] == cell_id, 'centroid-0'].values,
cell_props.loc[cell_props['label'] == cell_id, 'centroid-1'].values
)).T

# calculate the total signal intensity within cell
cell_counts = extraction_function[extraction](cell_coords, input_images, **kwargs)

# get morphology metrics
current_cell_props = cell_props.loc[cell_props['label'] == cell_id, regionprops_names]

# combine marker counts and morphology metrics together
cell_features = np.concatenate((cell_counts, current_cell_props), axis=None)

# add counts of each marker to appropriate column
marker_counts.loc['whole_cell', cell_id, marker_counts.features[1]:] = cell_features

# add cell size to first column
marker_counts.loc['whole_cell', cell_id, marker_counts.features[0]] = cell_coords.shape[0]
# assign properties for whole cell compartment
marker_counts = assign_single_compartment_features(
marker_counts, 'whole_cell', cell_props, cell_coords, cell_id, cell_id,
input_images, regionprops_names, extraction, **sig_kwargs
)

if nuclear_counts:
# get id of corresponding nucleus
nuc_id = segmentation_utils.find_nuclear_label_id(nuc_segmentation_labels=nuc_labels,
cell_coords=cell_coords)

if nuc_id is not None:
# get coordinates of corresponding nucleus
# get the coords of the corresponding nucleus
nuc_coords = nuc_props.loc[nuc_props['label'] == nuc_id, 'coords'].values[0]

# get nuclear centroid
kwargs['centroid'] = np.array((
nuc_props.loc[nuc_props['label'] == nuc_id, 'centroid-0'].values,
nuc_props.loc[nuc_props['label'] == nuc_id, 'centroid-1'].values
)).T

# extract nuclear signal
nuc_counts = extraction_function[extraction](nuc_coords, input_images, **kwargs)

# get morphology metrics
current_nuc_props = nuc_props.loc[
nuc_props['label'] == nuc_id, regionprops_names]

# combine marker counts and morphology metrics together
nuc_features = np.concatenate((nuc_counts, current_nuc_props), axis=None)

# add counts of each marker to appropriate column
marker_counts.loc['nuclear', cell_id, marker_counts.features[1]:] = nuc_features
# assign properties for nuclear compartment
marker_counts = assign_single_compartment_features(
marker_counts, 'nuclear', nuc_props, nuc_coords, cell_id, nuc_id,
input_images, regionprops_names, extraction, **sig_kwargs
)

# add cell size to first column
marker_counts.loc['nuclear', cell_id, marker_counts.features[0]] = \
nuc_coords.shape[0]
# generate properties which involve multiple compartments
marker_counts = assign_multi_compartment_features(
marker_counts, regionprops_multi_comp
)

return marker_counts

Expand All @@ -187,7 +315,7 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts
extraction (str):
extraction function used to compute marker counts.
**kwargs:
arbitrary keyword args
arbitrary keyword args for compute_marker_counts
Returns:
tuple (pandas.DataFrame, pandas.DataFrame):
Expand All @@ -209,7 +337,7 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts

misc_utils.verify_in_list(
extraction=extraction,
extraction_options=list(extraction_function.keys())
extraction_options=list(EXTRACTION_FUNCTION.keys())
)

misc_utils.verify_same_elements(segmentation_labels_fovs=segmentation_labels.fovs.values,
Expand Down Expand Up @@ -296,7 +424,7 @@ def generate_cell_table(segmentation_labels, tiff_dir, img_sub_folder,
extraction (str):
extraction function used to compute marker counts.
**kwargs:
arbitrary keyword arguments for signal extraction
arbitrary keyword arguments for signal and regionprops extraction
Returns:
tuple (pandas.DataFrame, pandas.DataFrame):å
Expand All @@ -316,7 +444,7 @@ def generate_cell_table(segmentation_labels, tiff_dir, img_sub_folder,

misc_utils.verify_in_list(
extraction=extraction,
extraction_options=list(extraction_function.keys())
extraction_options=list(EXTRACTION_FUNCTION.keys())
)

# check segmentation_labels for given fovs (img loaders will fail otherwise)
Expand Down
Loading

0 comments on commit 1cecff9

Please sign in to comment.