Skip to content

Commit

Permalink
Merge branch 'master' into plot_clustering_result
Browse files Browse the repository at this point in the history
  • Loading branch information
omerbt committed Dec 4, 2020
2 parents e41148f + 0580df7 commit bbea784
Show file tree
Hide file tree
Showing 15 changed files with 545 additions and 226 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ After updating, you can copy over any important paths or modifications from the

## Questions?

Please refer to our [FAQ](https://ark-analysis.readthedocs.io/en/latest/_rtd/faq.html).
If you run into trouble, please first refer to our [FAQ](https://ark-analysis.readthedocs.io/en/latest/_rtd/faq.html). If that doesn't answer your question, you can open an [issue](https://github.com/angelolab/ark-analysis/issues). Before opening, please double check and see that someone else hasn't opened an issue for your question already.

## Want to contribute?

If you would like to help make `ark` better, please take a look at our [contributing guidelines](https://ark-analysis.readthedocs.io/en/latest/_rtd/contributing.html)
If you would like to help make `ark` better, please take a look at our [contributing guidelines](https://ark-analysis.readthedocs.io/en/latest/_rtd/contributing.html).
65 changes: 57 additions & 8 deletions ark/segmentation/marker_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
from skimage.measure import regionprops_table

from ark.utils import io_utils, load_utils, misc_utils, segmentation_utils
from ark.segmentation import signal_extraction
from ark.segmentation.signal_extraction import extraction_function

import ark.settings as settings


def compute_marker_counts(input_images, segmentation_labels, nuclear_counts=False,
regionprops_features=None, split_large_nuclei=False):
regionprops_features=None, split_large_nuclei=False,
extraction='total_intensity', **kwargs):
"""Extract single cell protein expression data from channel TIFs for a single fov
Args:
input_images (xarray.DataArray):
rows x columns x channels matrix of imaging data
Expand All @@ -27,11 +29,20 @@ def compute_marker_counts(input_images, segmentation_labels, nuclear_counts=Fals
morphology features for regionprops to extract for each cell
split_large_nuclei (bool):
controls whether nuclei which have portions outside of the cell will get relabeled
extraction (str):
extraction function used to compute marker counts.
**kwargs:
arbitrary keyword arguments
Returns:
xarray.DataArray:
xarray containing segmented data of cells x markers
"""

misc_utils.verify_in_list(
extraction=extraction,
extraction_options=list(extraction_function.keys())
)

if regionprops_features is None:
regionprops_features = ['label', 'area', 'eccentricity', 'major_axis_length',
'minor_axis_length', 'perimeter', 'centroid']
Expand All @@ -43,6 +54,10 @@ def compute_marker_counts(input_images, segmentation_labels, nuclear_counts=Fals
if 'label' not in regionprops_features:
regionprops_features.append('label')

# centroid is required
if not any(['centroid' in rpf for rpf in regionprops_features]):
regionprops_features.append('centroid')

# enforce post channel column is present and first
if regionprops_features[0] != settings.POST_CHANNEL_COL:
if settings.POST_CHANNEL_COL in regionprops_features:
Expand Down Expand Up @@ -97,8 +112,14 @@ def compute_marker_counts(input_images, segmentation_labels, nuclear_counts=Fals
# get coords corresponding to current cell.
cell_coords = cell_props.loc[cell_props['label'] == cell_id, 'coords'].values[0]

# get centroid corresponding to current cell
kwargs['centroid'] = np.array((
cell_props.loc[cell_props['label'] == cell_id, 'centroid-0'].values,
cell_props.loc[cell_props['label'] == cell_id, 'centroid-1'].values
)).T

# calculate the total signal intensity within cell
cell_counts = signal_extraction.default_extraction(cell_coords, input_images)
cell_counts = extraction_function[extraction](cell_coords, input_images, **kwargs)

# get morphology metrics
current_cell_props = cell_props.loc[cell_props['label'] == cell_id, regionprops_names]
Expand All @@ -121,8 +142,14 @@ def compute_marker_counts(input_images, segmentation_labels, nuclear_counts=Fals
# get coordinates of corresponding nucleus
nuc_coords = nuc_props.loc[nuc_props['label'] == nuc_id, 'coords'].values[0]

# get nuclear centroid
kwargs['centroid'] = np.array((
nuc_props.loc[nuc_props['label'] == nuc_id, 'centroid-0'].values,
nuc_props.loc[nuc_props['label'] == nuc_id, 'centroid-1'].values
)).T

# extract nuclear signal
nuc_counts = signal_extraction.default_extraction(nuc_coords, input_images)
nuc_counts = extraction_function[extraction](nuc_coords, input_images, **kwargs)

# get morphology metrics
current_nuc_props = nuc_props.loc[
Expand All @@ -142,7 +169,7 @@ def compute_marker_counts(input_images, segmentation_labels, nuclear_counts=Fals


def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts=False,
split_large_nuclei=False):
split_large_nuclei=False, extraction='total_intensity', **kwargs):
"""Create a matrix of cells by channels with the total counts of each marker in each cell.
Args:
Expand All @@ -157,6 +184,10 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts
split_large_nuclei (bool):
boolean flag to determine whether nuclei which are larger than their assigned cell
will get split into two different nuclear objects
extraction (str):
extraction function used to compute marker counts.
**kwargs:
arbitrary keyword args
Returns:
tuple (pandas.DataFrame, pandas.DataFrame):
Expand All @@ -176,6 +207,11 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts
compartment_names=segmentation_labels.compartments.values
)

misc_utils.verify_in_list(
extraction=extraction,
extraction_options=list(extraction_function.keys())
)

misc_utils.verify_same_elements(segmentation_labels_fovs=segmentation_labels.fovs.values,
img_data_fovs=image_data.fovs.values)

Expand All @@ -193,7 +229,8 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts
# extract the counts per cell for each marker
marker_counts = compute_marker_counts(image_data.loc[fov, :, :, :], segmentation_label,
nuclear_counts=nuclear_counts,
split_large_nuclei=split_large_nuclei)
split_large_nuclei=split_large_nuclei,
extraction=extraction, **kwargs)

# normalize counts by cell size
marker_counts_norm = segmentation_utils.transform_expression_matrix(marker_counts,
Expand Down Expand Up @@ -235,7 +272,8 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts


def generate_cell_table(segmentation_labels, tiff_dir, img_sub_folder,
is_mibitiff=False, fovs=None, batch_size=5, dtype="int16"):
is_mibitiff=False, fovs=None, batch_size=5, dtype="int16",
extraction='total_intensity', **kwargs):
"""This function takes the segmented data and computes the expression matrices batch-wise
while also validating inputs
Expand All @@ -255,6 +293,10 @@ def generate_cell_table(segmentation_labels, tiff_dir, img_sub_folder,
necessary for speed and memory considerations
dtype (str/type):
data type of base images
extraction (str):
extraction function used to compute marker counts.
**kwargs:
arbitrary keyword arguments for signal extraction
Returns:
tuple (pandas.DataFrame, pandas.DataFrame):å
Expand All @@ -272,6 +314,11 @@ def generate_cell_table(segmentation_labels, tiff_dir, img_sub_folder,
# drop file extensions
fovs = io_utils.remove_file_extensions(fovs)

misc_utils.verify_in_list(
extraction=extraction,
extraction_options=list(extraction_function.keys())
)

# check segmentation_labels for given fovs (img loaders will fail otherwise)
misc_utils.verify_in_list(fovs=fovs,
segmentation_labels_fovs=segmentation_labels['fovs'].values)
Expand Down Expand Up @@ -312,7 +359,9 @@ def generate_cell_table(segmentation_labels, tiff_dir, img_sub_folder,
# segment the imaging data
cell_table_size_normalized, cell_table_arcsinh_transformed = create_marker_count_matrices(
segmentation_labels=current_labels,
image_data=image_data
image_data=image_data,
extraction=extraction,
**kwargs
)

# now append to the final dfs to return
Expand Down
69 changes: 69 additions & 0 deletions ark/segmentation/marker_quantification_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,17 @@ def test_compute_marker_counts_base():
assert np.array_equal(segmentation_output.loc['whole_cell', :, settings.CELL_SIZE],
segmentation_output.loc['whole_cell', :, 'area'])

# test different extraction selection
center_extraction = \
marker_quantification.compute_marker_counts(input_images=input_images,
segmentation_labels=segmentation_labels,
extraction='center_weighting')

assert np.all(
segmentation_output.loc['whole_cell', :, 'chan0'].values
> center_extraction.loc['whole_cell', :, 'chan0'].values
)


def test_compute_marker_counts_equal_masks():
cell_mask, channel_data = test_utils.create_test_extraction_data()
Expand Down Expand Up @@ -452,3 +463,61 @@ def test_generate_cell_data_mibitiff_loading():

assert norm_data.shape[0] > 0 and norm_data.shape[1] > 0
assert arcsinh_data.shape[0] > 0 and arcsinh_data.shape[1] > 0


def test_generate_cell_data_extractions():
with tempfile.TemporaryDirectory() as temp_dir:
# define 3 fovs and 3 imgs per fov
fovs, chans = test_utils.gen_fov_chan_names(3, 3)

tiff_dir = os.path.join(temp_dir, "single_channel_inputs")
img_sub_folder = "TIFs"

os.mkdir(tiff_dir)
test_utils.create_paired_xarray_fovs(
base_dir=tiff_dir,
fov_names=fovs,
channel_names=chans,
img_shape=(40, 40),
sub_dir=img_sub_folder,
fills=True,
dtype="int16"
)

# generate a sample segmentation_mask
cell_mask, _ = test_utils.create_test_extraction_data()

cell_masks = np.zeros((3, 40, 40, 1), dtype="int16")
cell_masks[0, :, :, 0] = cell_mask[0, :, :, 0]
cell_masks[1, 5:, 5:, 0] = cell_mask[0, :-5, :-5, 0]
cell_masks[2, 10:, 10:, 0] = cell_mask[0, :-10, :-10, 0]

segmentation_masks = test_utils.make_labels_xarray(
label_data=cell_masks,
compartment_names=['whole_cell']
)

default_norm_data, _ = marker_quantification.generate_cell_table(
segmentation_labels=segmentation_masks, tiff_dir=tiff_dir,
img_sub_folder=img_sub_folder, is_mibitiff=False, batch_size=2,
)

# verify total intensity extraction
assert np.all(
default_norm_data.loc[default_norm_data[settings.CELL_LABEL] == 1][chans].values
== np.arange(9).reshape(3, 3)
)

thresh_kwargs = {
'threshold': 1
}

# verify thresh kwarg passes through
positive_pixel_data, _ = marker_quantification.generate_cell_table(
segmentation_labels=segmentation_masks, tiff_dir=tiff_dir,
img_sub_folder=img_sub_folder, is_mibitiff=False, batch_size=2,
extraction='positive_pixel', **thresh_kwargs
)

assert np.all(positive_pixel_data.iloc[:4][['chan0', 'chan1']].values == 0)
assert np.all(positive_pixel_data.iloc[4:][chans].values == 1)
29 changes: 17 additions & 12 deletions ark/segmentation/signal_extraction.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import numpy as np


def positive_pixels_extraction(cell_coords, image_data, threshold=0):
def positive_pixels_extraction(cell_coords, image_data, **kwargs):
"""Extract channel counts by summing over the number of non-zero pixels in the cell.
Improves on default_extraction by not distinguishing between pixel expression values
Args:
cell_coords (numpy.ndarray): values representing pixels within one cell
image_data (xarray.DataArray): array containing channel counts
threshold (int): where we want to set the cutoff for a positive pixel, default 0
**kwargs: arbitrary keyword arguments
Returns:
numpy.ndarray:
Expand All @@ -20,32 +18,30 @@ def positive_pixels_extraction(cell_coords, image_data, threshold=0):
channel_values = image_data.values[tuple(cell_coords.T)]

# create binary mask based on threshold
channel_counts = np.sum(channel_values > threshold, axis=0)
channel_counts = np.sum(channel_values > kwargs.get('threshold', 0), axis=0)

return channel_counts


def center_weighting_extraction(cell_coords, image_data, centroid):
def center_weighting_extraction(cell_coords, image_data, **kwargs):
"""Extract channel counts by summing over weighted expression values based on distance from
center.
Improves upon default extraction by including a level of certainty/uncertainty.
Args:
cell_coords (numpy.ndarray):
values representing pixels within one cell
image_data (xarray.DataArray):
array containing channel counts
centroid (tuple):
the centroid of the region in question
**kwargs:
arbitrary keyword arguments
Returns:
numpy.ndarray:
Sums of counts for each channel
"""

# compute the distance box-level from the center outward
weights = np.linalg.norm(cell_coords - centroid, ord=np.inf, axis=1)
weights = np.linalg.norm(cell_coords - kwargs.get('centroid'), ord=np.inf, axis=1)

# center the weights around the middle value
weights = 1 - (weights / (np.max(weights) + 1))
Expand All @@ -57,14 +53,16 @@ def center_weighting_extraction(cell_coords, image_data, centroid):
return channel_counts


def default_extraction(cell_coords, image_data):
def total_intensity_extraction(cell_coords, image_data, **kwargs):
""" Extract channel counts for an individual cell via basic summation for each channel
Args:
cell_coords (numpy.ndarray):
values representing pixels within one cell
image_data (xarray.DataArray):
array containing channel counts
**kwargs:
arbitrary keyword arguments
Returns:
numpy.ndarray:
Expand All @@ -78,3 +76,10 @@ def default_extraction(cell_coords, image_data):
channel_counts = np.sum(channel_values, axis=0)

return channel_counts


extraction_function = {
'positive_pixel': positive_pixels_extraction,
'center_weighting': center_weighting_extraction,
'total_intensity': total_intensity_extraction,
}
Loading

0 comments on commit bbea784

Please sign in to comment.