Skip to content

Commit

Permalink
Change load functions to default to image dtype (#609)
Browse files Browse the repository at this point in the history
* adjusted load_imgs_from_dir's force_ints behavior

* test changes

* added a function to convert deepcell seg masksfrom float32 to int16 via ranked_data.

* removed dtype checks

* debugging bytes -> numpy array

* dtype removal

* removed dtype in som_utils

* reshaped `ranked_mask` in `_convert_deepcell_seg_masks` from 1D n^2 x 1 -> n x n

* fixed deepcell shape issue

* added negative value check

* added negative value check

* adjusted tests

Co-authored-by: Noah F. Greenwald <noahfgreenwald@gmail.com>
  • Loading branch information
srivarra and ngreenwald committed Jul 8, 2022
1 parent da89052 commit f3b9c56
Show file tree
Hide file tree
Showing 14 changed files with 161 additions and 208 deletions.
49 changes: 16 additions & 33 deletions ark/phenotyping/som_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@


def calculate_channel_percentiles(tiff_dir, fovs, channels, img_sub_folder,
percentile, dtype="float32"):
percentile):
"""Calculates average percentile for each channel in the dataset
Args:
Expand All @@ -40,8 +40,6 @@ def calculate_channel_percentiles(tiff_dir, fovs, channels, img_sub_folder,
Sub folder within each FOV containing image data
percentile (float):
The specific percentile to compute
dtype (type):
The type to use for loading the image data in
Returns:
pd.DataFrame:
Expand All @@ -57,8 +55,7 @@ def calculate_channel_percentiles(tiff_dir, fovs, channels, img_sub_folder,
for fov in fovs:
# load image data and remove 0 valued pixels
img = load_utils.load_imgs_from_tree(data_dir=tiff_dir, img_sub_folder=img_sub_folder,
channels=[channel], fovs=[fov],
dtype=dtype).values[0, :, :, 0]
channels=[channel], fovs=[fov]).values[0, :, :, 0]
img = img[img > 0]

# record and store percentile, skip if no non-zero pixels
Expand All @@ -75,7 +72,7 @@ def calculate_channel_percentiles(tiff_dir, fovs, channels, img_sub_folder,


def calculate_pixel_intensity_percentile(tiff_dir, fovs, channels, img_sub_folder,
channel_percentiles, percentile=0.05, dtype="float32"):
channel_percentiles, percentile=0.05):
"""Calculates average percentile per FOV for total signal in each pixel
Args:
Expand All @@ -92,8 +89,7 @@ def calculate_pixel_intensity_percentile(tiff_dir, fovs, channels, img_sub_folde
Computed by `calculate_channel_percentiles`
percentile (float):
The pixel intensity percentile per FOV to average over
dtype (type):
The type to use for loading the image data in
Returns:
float:
Expand All @@ -109,8 +105,7 @@ def calculate_pixel_intensity_percentile(tiff_dir, fovs, channels, img_sub_folde
for fov in fovs:
# load image data
img_data = load_utils.load_imgs_from_tree(data_dir=tiff_dir, fovs=[fov],
channels=channels, img_sub_folder=img_sub_folder,
dtype=dtype)
channels=channels, img_sub_folder=img_sub_folder)

# normalize each channel by its percentile value
norm_data = img_data[0].values / norm_vect
Expand Down Expand Up @@ -178,8 +173,7 @@ def check_for_modified_channels(tiff_dir, test_fov, img_sub_folder, channels):

# get all channels within example FOV
all_channels = io_utils.list_files(os.path.join(tiff_dir, test_fov, img_sub_folder))
all_channels = io_utils.remove_file_extensions(all_channels
)
all_channels = io_utils.remove_file_extensions(all_channels)
# define potential modifications to channel names
mods = ['_smoothed', '_nuc_include', '_nuc_exclude']

Expand All @@ -197,7 +191,7 @@ def check_for_modified_channels(tiff_dir, test_fov, img_sub_folder, channels):
pass


def smooth_channels(fovs, tiff_dir, img_sub_folder, channels, smooth_vals, dtype="float32"):
def smooth_channels(fovs, tiff_dir, img_sub_folder, channels, smooth_vals):
"""Adds additional smoothing for selected channels as a preprocessing step
Args:
Expand All @@ -212,8 +206,7 @@ def smooth_channels(fovs, tiff_dir, img_sub_folder, channels, smooth_vals, dtype
smooth_vals (list or int):
amount to smooth channels. If a single int, applies
to all channels. Otherwise, a custom value per channel can be supplied
dtype (type):
the type to use for loading the image data in
"""

# no output if no channels specified
Expand All @@ -237,8 +230,7 @@ def smooth_channels(fovs, tiff_dir, img_sub_folder, channels, smooth_vals, dtype
for fov in fovs:
for idx, chan in enumerate(channels):
img = load_utils.load_imgs_from_tree(data_dir=tiff_dir, img_sub_folder=img_sub_folder,
fovs=[fov], channels=[chan],
dtype=dtype).values[0, :, :, 0]
fovs=[fov], channels=[chan]).values[0, :, :, 0]
chan_out = ndimage.gaussian_filter(img, sigma=smooth_vals[idx])
imsave(os.path.join(tiff_dir, fov, img_sub_folder, chan + '_smoothed.tiff'),
chan_out, check_contrast=False)
Expand Down Expand Up @@ -759,7 +751,7 @@ def create_fov_pixel_data(fov, channels, img_data, seg_labels, pixel_norm_val,

def preprocess_fov(base_dir, tiff_dir, data_dir, subset_dir, seg_dir, seg_suffix,
img_sub_folder, is_mibitiff, channels, blur_factor,
subset_proportion, pixel_norm_val, dtype, seed, channel_norm_df, fov):
subset_proportion, pixel_norm_val, seed, channel_norm_df, fov):
"""Helper function to read in the FOV-level pixel data, run `create_fov_pixel_data`,
and save the preprocessed data.
Expand Down Expand Up @@ -791,8 +783,6 @@ def preprocess_fov(base_dir, tiff_dir, data_dir, subset_dir, seg_dir, seg_suffix
The proportion of pixels to take from each fov
pixel_norm_val (float):
The value to normalize the pixels by
dtype (type):
The type to load the image segmentation labels in
seed (int):
The random seed to set for subsetting
channel_norm_df (pandas.DataFrame):
Expand All @@ -809,12 +799,10 @@ def preprocess_fov(base_dir, tiff_dir, data_dir, subset_dir, seg_dir, seg_suffix
# load img_xr from MIBITiff or directory with the fov
if is_mibitiff:
img_xr = load_utils.load_imgs_from_mibitiff(
tiff_dir, mibitiff_files=[fov], dtype=dtype
)
tiff_dir, mibitiff_files=[fov])
else:
img_xr = load_utils.load_imgs_from_tree(
tiff_dir, img_sub_folder=img_sub_folder, fovs=[fov], dtype=dtype
)
tiff_dir, img_sub_folder=img_sub_folder, fovs=[fov])

# ensure the provided channels will actually exist in img_xr
misc_utils.verify_in_list(
Expand Down Expand Up @@ -873,7 +861,7 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
data_dir='pixel_mat_data',
subset_dir='pixel_mat_subsetted',
norm_vals_name='post_rowsum_chan_norm.feather', is_mibitiff=False,
blur_factor=2, subset_proportion=0.1, dtype="float32", seed=42,
blur_factor=2, subset_proportion=0.1, seed=42,
channel_percentile=0.99, batch_size=5):
"""For each fov, add a Gaussian blur to each channel and normalize channel sums for each pixel
Expand Down Expand Up @@ -919,8 +907,6 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
The proportion of pixels to take from each fov
seed (int):
The random seed to set for subsetting
dtype (type):
The type to use for loading the image data in
channel_percentile (float):
Percentile used to normalize channels to same range
batch_size (int):
Expand Down Expand Up @@ -969,8 +955,7 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
fovs=fovs,
channels=channels,
img_sub_folder=img_sub_folder,
percentile=channel_percentile,
dtype=dtype)
percentile=channel_percentile)
# save output
feather.write_dataframe(channel_norm_df, channel_norm_path, compression='uncompressed')

Expand All @@ -987,9 +972,7 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
# compute pixel percentiles
pixel_norm_val = calculate_pixel_intensity_percentile(
tiff_dir=tiff_dir, fovs=fovs, channels=channels,
img_sub_folder=img_sub_folder, channel_percentiles=channel_norm_df,
dtype=dtype
)
img_sub_folder=img_sub_folder, channel_percentiles=channel_norm_df)

pixel_norm_df = pd.DataFrame({'pixel_norm_val': [pixel_norm_val]})
feather.write_dataframe(pixel_norm_df, pixel_norm_path, compression='uncompressed')
Expand All @@ -1002,7 +985,7 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
fov_data_func = partial(
preprocess_fov, base_dir, tiff_dir, data_dir, subset_dir,
seg_dir, seg_suffix, img_sub_folder, is_mibitiff, channels, blur_factor,
subset_proportion, pixel_norm_val, dtype, seed, channel_norm_df
subset_proportion, pixel_norm_val, seed, channel_norm_df
)

# define the multiprocessing context
Expand Down
14 changes: 5 additions & 9 deletions ark/phenotyping/som_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,16 +275,14 @@ def mocked_create_fov_pixel_data(fov, channels, img_data, seg_labels, blur_facto

def mocked_preprocess_fov(base_dir, tiff_dir, data_dir, subset_dir, seg_dir, seg_suffix,
img_sub_folder, is_mibitiff, channels, blur_factor,
subset_proportion, pixel_norm_val, dtype, seed, channel_norm_df, fov):
subset_proportion, pixel_norm_val, seed, channel_norm_df, fov):
# load img_xr from MIBITiff or directory with the fov
if is_mibitiff:
img_xr = load_utils.load_imgs_from_mibitiff(
tiff_dir, mibitiff_files=[fov], dtype=dtype
)
tiff_dir, mibitiff_files=[fov])
else:
img_xr = load_utils.load_imgs_from_tree(
tiff_dir, img_sub_folder=img_sub_folder, fovs=[fov], dtype=dtype
)
tiff_dir, img_sub_folder=img_sub_folder, fovs=[fov])

# ensure the provided channels will actually exist in img_xr
misc_utils.verify_in_list(
Expand Down Expand Up @@ -504,8 +502,7 @@ def test_smooth_channels(smooth_vals):
smooth_channels = ['chan0', 'chan1']

som_utils.smooth_channels(fovs=fovs, tiff_dir=temp_dir, img_sub_folder='TIFs',
channels=smooth_channels, smooth_vals=smooth_vals,
dtype="int16")
channels=smooth_channels, smooth_vals=smooth_vals)

# check that correct value was applied
for fov in fovs:
Expand Down Expand Up @@ -1190,7 +1187,7 @@ def test_preprocess_fov(mocker):
som_utils.preprocess_fov(
temp_dir, tiff_dir, 'pixel_mat_data', 'pixel_mat_subsetted',
seg_dir, '_feature_0.tif', 'TIFs', False, ['chan0', 'chan1', 'chan2'],
2, 0.1, 1, 'int16', 42, channel_norm_df, 'fov0'
2, 0.1, 1, 42, channel_norm_df, 'fov0'
)

fov_data_path = os.path.join(
Expand Down Expand Up @@ -1444,7 +1441,6 @@ def test_create_pixel_matrix(fovs, chans, sub_dir, seg_dir_include,
tiff_dir=new_tiff_dir,
img_sub_folder=sub_dir,
seg_dir=seg_dir,
dtype='float32',
pixel_cluster_prefix='test')


Expand Down
15 changes: 4 additions & 11 deletions ark/segmentation/marker_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,14 +507,11 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
# and extract the image data for each batch
if is_mibitiff:
image_data = load_utils.load_imgs_from_mibitiff(data_dir=tiff_dir,
mibitiff_files=batch_files,
dtype=dtype)
mibitiff_files=batch_files)
else:
image_data = load_utils.load_imgs_from_tree(data_dir=tiff_dir,
img_sub_folder=img_sub_folder,
fovs=batch_names,
dtype=dtype)

fovs=batch_names)
# define the files for whole cell and nuclear
whole_cell_files = [fov + '_feature_0.tif' for fov in batch_names]
nuclear_files = [fov + '_feature_1.tif' for fov in batch_names]
Expand All @@ -524,16 +521,12 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
files=whole_cell_files,
xr_dim_name='compartments',
xr_channel_names=['whole_cell'],
trim_suffix='_feature_0',
force_ints=True)

trim_suffix='_feature_0')
current_labels_nuc = load_utils.load_imgs_from_dir(data_dir=segmentation_dir,
files=nuclear_files,
xr_dim_name='compartments',
xr_channel_names=['nuclear'],
trim_suffix='_feature_1',
force_ints=True)

trim_suffix='_feature_1')
current_labels = xr.DataArray(np.concatenate((current_labels_cell.values,
current_labels_nuc.values),
axis=-1),
Expand Down
10 changes: 3 additions & 7 deletions ark/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,7 @@ def generate_cell_cluster_mask(fovs, base_dir, seg_dir, cell_data_name,
files=whole_cell_files,
xr_dim_name='compartments',
xr_channel_names=['whole_cell'],
trim_suffix=seg_suffix.split('.')[0],
force_ints=True)

trim_suffix=seg_suffix.split('.')[0])
# use label_cells_by_cluster to create cell masks
img_data = label_cells_by_cluster(
fovs, cell_consensus_data, label_maps, fov_col='fov',
Expand Down Expand Up @@ -335,12 +333,10 @@ def generate_deepcell_input(data_dir, tiff_dir, nuc_channels, mem_channels, fovs
# load the images in the current fov batch
if is_mibitiff:
data_xr = load_utils.load_imgs_from_mibitiff(
tiff_dir, mibitiff_files=fovs, channels=channels, dtype=dtype
)
tiff_dir, mibitiff_files=fovs, channels=channels)
else:
data_xr = load_utils.load_imgs_from_tree(
tiff_dir, img_sub_folder=img_sub_folder, fovs=fovs, channels=channels, dtype=dtype
)
tiff_dir, img_sub_folder=img_sub_folder, fovs=fovs, channels=channels)

# write each fov data to data_dir
for fov in data_xr.fovs.values:
Expand Down
39 changes: 36 additions & 3 deletions ark/utils/deepcell_service_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@
from tqdm.notebook import tqdm
from urllib.parse import unquote_plus
import warnings
from concurrent.futures import ThreadPoolExecutor
import numpy as np
from scipy import stats
from skimage import io, external
from io import BytesIO
from ark.utils import misc_utils
from zipfile import ZipFile, ZIP_DEFLATED

from ark.utils import io_utils, misc_utils


Expand Down Expand Up @@ -136,8 +141,12 @@ def zip_write(zip_path):

with ZipFile(zip_files[-1], "r") as zipObj:
for name in zipObj.namelist():
with open(os.path.join(deepcell_output_dir, name), mode='wb') as f:
f.write(zipObj.read(name))
mask_path = os.path.join(deepcell_output_dir, name)
byte_repr = zipObj.read(name)
ranked_segmentation_mask = _convert_deepcell_seg_masks(byte_repr)
io.imsave(mask_path, ranked_segmentation_mask, plugin="tifffile",
check_contrast=False)

for fov in fov_group:
if fov + suffix + '.tif' not in zipObj.namelist():
warnings.warn(f'Deep Cell output file was not found for {fov}.')
Expand Down Expand Up @@ -299,3 +308,27 @@ def run_deepcell_direct(input_dir, output_dir, host='https://deepcell.org',
)

return 0


def _convert_deepcell_seg_masks(seg_mask: bytes) -> np.ndarray:
"""Converts the segmentation masks provided by deepcell from `float32` to `int16`
(via assigning ranks to data, dealing with ties appropriately)
as segmentation masks need to be integers in order to work as intended with
scikit-image.
Args:
seg_mask (bytes): The output of deep cell's segmentation algorithm as file bytes.
Returns:
np.ndarray: The segmentation masks, converted from floating point 64-bit to integer
16-bit via `scipy.stats.rankdata`
"""
float_mask = external.tifffile.imread(BytesIO(seg_mask))

# Reshape as ranked_mask returns a 1D numpy array, dims: n^2 x 1 -> 1 x n x n
shape = float_mask.shape

# Create the ranked mask
ranked_mask: np.ndarray = stats.rankdata(float_mask).astype(dtype="int16").reshape(shape)

return ranked_mask
25 changes: 18 additions & 7 deletions ark/utils/deepcell_service_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,20 @@
from zipfile import ZipFile
import pytest
from pytest_mock import MockerFixture

import numpy as np
from skimage import io
from ark.utils.deepcell_service_utils import create_deepcell_output


def mocked_run_deepcell(in_zip_path, output_dir, host, job_type, scale, timeout):
pathlib.Path(os.path.join(output_dir, 'fov1_feature_0.tif')).touch()
pathlib.Path(os.path.join(output_dir, 'fov2_feature_0.tif')).touch()
pathlib.Path(os.path.join(output_dir, 'fov3_feature_0.tif')).touch()

fov_data = np.ones(shape=(10, 10), dtype="float32")
io.imsave(os.path.join(output_dir, 'fov1_feature_0.tif'),
fov_data, plugin="tifffile", check_contrast=False)
io.imsave(os.path.join(output_dir, 'fov2_feature_0.tif'),
fov_data, plugin="tifffile", check_contrast=False)
io.imsave(os.path.join(output_dir, 'fov3_feature_0.tif'),
fov_data, plugin="tifffile", check_contrast=False)

batch_num = int(in_zip_path.split('.')[0].split('_')[-1])
if batch_num < 2:
Expand All @@ -35,9 +41,14 @@ def test_create_deepcell_output(mocker: MockerFixture):

input_dir = os.path.join(temp_dir, 'input_dir')
os.makedirs(input_dir)
pathlib.Path(os.path.join(input_dir, 'fov1.tif')).touch()
pathlib.Path(os.path.join(input_dir, 'fov2.tif')).touch()
pathlib.Path(os.path.join(input_dir, 'fov3.tiff')).touch()

fov_data = np.ones(shape=(10, 10), dtype="float32")
io.imsave(os.path.join(input_dir, 'fov1.tif'),
fov_data, plugin="tifffile", check_contrast=False)
io.imsave(os.path.join(input_dir, 'fov2.tif'),
fov_data, plugin="tifffile", check_contrast=False)
io.imsave(os.path.join(input_dir, 'fov3.tiff'),
fov_data, plugin="tifffile", check_contrast=False)

with tempfile.TemporaryDirectory() as output_dir:

Expand Down
Loading

0 comments on commit f3b9c56

Please sign in to comment.