Skip to content

Commit

Permalink
Added Batching Functionality to Pixel Mask Generation and FOV Saving (#…
Browse files Browse the repository at this point in the history
…643)

* batching generate_pixel_cluster_mask

* added batch_size arg to the pixel_clustering template notebook

* added `batch_size` to save_fov_images

* fixed test_save_fov_images

* added generate and save funcs

* added tests

* added func to pixel_clustering nb

* formatting, nb test tag fix

* sphinx fix #1

* sphinx - skeleton docstring

* sphinx - skeleton docstring 2

* sphinx

* docstring fix test

* docstring fix test2

* pycodestyle

* pycodestyle

* refactored assert(...) to assert ...

* docstring fix

* debugging dir issue

* fixed loading the masks

* fixed mantis project path issue with batched masks

* variable name change

* removed dead code
  • Loading branch information
srivarra committed Aug 10, 2022
1 parent bb687ed commit 070beda
Show file tree
Hide file tree
Showing 6 changed files with 337 additions and 160 deletions.
117 changes: 117 additions & 0 deletions ark/utils/data_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
import pathlib
from typing import List, Union
import math
import feather
import skimage.io as io
Expand All @@ -8,6 +10,7 @@
from ark import settings
from ark.utils import load_utils
from ark.utils.misc_utils import verify_in_list
from tqdm.notebook import tqdm_notebook as tqdm


def save_fov_images(fovs, data_dir, img_xr, sub_dir=None, name_suffix=''):
Expand Down Expand Up @@ -258,6 +261,120 @@ def generate_pixel_cluster_mask(fovs, base_dir, tiff_dir, chan_file,
dims=["fovs", "rows", "cols"])


def generate_and_save_pixel_cluster_masks(fovs: List[str],
base_dir: Union[pathlib.Path, str],
save_dir: Union[pathlib.Path, str],
tiff_dir: Union[pathlib.Path, str],
chan_file: Union[pathlib.Path, str],
pixel_data_dir: Union[pathlib.Path, str],
pixel_cluster_col: str = 'pixel_meta_cluster',
sub_dir: str = None,
name_suffix: str = '',
batch_size=5):
"""Generates pixel cluster masks and saves them in batches for downstream analysis.
Args:
fovs (List[str]):
A list of fovs to generate and save pixel masks for.
base_dir (Union[pathlib.Path, str]):
The path to the data directory.
save_dir (Union[pathlib.Path, str]):
The directory to save the generated pixel cluster masks.
tiff_dir (Union[pathlib.Path, str]):
The path to the directory with the tiff data.
chan_file (Union[pathlib.Path, str]):
The path to the sample channel file to load (assuming `tiff_dir` as root)
Only used to determine dimensions of the pixel mask.
pixel_data_dir (Union[pathlib.Path, str]):
The path to the data with full pixel data.
This data should also have the SOM and meta cluster labels appended.
pixel_cluster_col (str, optional):
The path to the data with full pixel data.
This data should also have the SOM and meta cluster labels appended.
Defaults to 'pixel_meta_cluster'.
sub_dir (str, optional):
The subdirectory to save the images in. If specified images are saved to
"data_dir/sub_dir". If `sub_dir = None` the images are saved to "data_dir". Defaults
to None.
name_suffix (str, optional):
Specify what to append at the end of every pixel mask. Defaults to ''.
batch_size (int, optional):
The number of fovs to process at once for each batch. Defaults to 5.
"""

# define a list of fov batches to process over
fov_batches = [fovs[i:i + batch_size] for i in range(0, len(fovs), batch_size)]

# create the pixel cluster masks over each fov batch.
with tqdm(total=len(fovs), desc="Pixel Cluster Mask Generation") as pixel_mask_progress:
for fov_batch in fov_batches:
pixel_masks: xr.DataArray =\
generate_pixel_cluster_mask(fovs=fov_batch, base_dir=base_dir, tiff_dir=tiff_dir,
chan_file=chan_file, pixel_data_dir=pixel_data_dir,
pixel_cluster_col=pixel_cluster_col)

save_fov_images(fov_batch, data_dir=save_dir, img_xr=pixel_masks, sub_dir=sub_dir,
name_suffix=name_suffix)

pixel_mask_progress.update(len(fov_batch))


def generate_and_save_cell_cluster_masks(fovs: List[str],
base_dir: Union[pathlib.Path, str],
save_dir: Union[pathlib.Path, str],
seg_dir: Union[pathlib.Path, str],
cell_data_name: Union[pathlib.Path, str],
cell_cluster_col: str = 'cell_meta_cluster',
seg_suffix: str = '_feature_0.tif',
sub_dir: str = None,
name_suffix: str = '',
batch_size=5):
"""Generates cell cluster masks and saves them in batches for downstream analysis.
Args:
fovs (List[str]):
A list of fovs to generate and save pixel masks for.
base_dir (Union[pathlib.Path, str]):
The path to the data directory.
save_dir (Union[pathlib.Path, str]):
The directory to save the generated cell cluster masks.
seg_dir (Union[pathlib.Path, str]):
The path to the segmentation data.
cell_data_name (Union[pathlib.Path, str]):
The path to the cell data with both cell SOM and meta cluster assignments
cell_cluster_col (str, optional):
Whether to assign SOM or meta clusters. Needs to be `'cell_som_cluster'` or
`'cell_meta_cluster'`. Defaults to `'cell_meta_cluster'`.
seg_suffix (str, optional):
The suffix that the segmentation images use. Defaults to `'_feature_0.tif'`.
sub_dir (str, optional):
The subdirectory to save the images in. If specified images are saved to
"data_dir/sub_dir". If `sub_dir = None` the images are saved to "data_dir".
Defaults to None.
name_suffix (str, optional):
Specify what to append at the end of every cell mask. Defaults to ''.
batch_size (int, optional):
The number of fovs to process at once for each batch. Defaults to 5.
"""

# define a list of fov batches to process over
fov_batches = [fovs[i:i + batch_size] for i in range(0, len(fovs), batch_size)]

# create the pixel cluster masks over each fov batch.
with tqdm(total=len(fovs), desc="Cell Cluster Mask Generation") as cell_mask_progress:
for fov_batch in fov_batches:
cell_masks: xr.DataArray =\
generate_cell_cluster_mask(fovs=fov_batch, base_dir=base_dir, seg_dir=seg_dir,
cell_data_name=cell_data_name,
cell_cluster_col=cell_cluster_col,
seg_suffix=seg_suffix)

save_fov_images(fov_batch, data_dir=save_dir, img_xr=cell_masks, sub_dir=sub_dir,
name_suffix=name_suffix)

cell_mask_progress.update(len(fov_batch))


def relabel_segmentation(labeled_image, labels_dict):
"""Takes a labeled image and translates its labels according to a dictionary.
Expand Down
145 changes: 142 additions & 3 deletions ark/utils/data_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
import feather
import pandas as pd
import xarray as xr

from ark.utils import data_utils, test_utils
import skimage.io as io

from ark.utils.data_utils import relabel_segmentation, label_cells_by_cluster
from ark.utils import data_utils, test_utils
from ark.utils.data_utils import (
generate_and_save_cell_cluster_masks,
generate_and_save_pixel_cluster_masks,
relabel_segmentation,
label_cells_by_cluster
)
from ark import settings


Expand Down Expand Up @@ -485,3 +489,138 @@ def test_generate_pixel_cluster_mask():

# assert no value is greater than the highest meta cluster value (5)
assert np.all(pixel_masks <= 5)


def test_generate_and_save_pixel_cluster_masks():
fov_count = 7
fovs = [f"fov{i}" for i in range(fov_count)]
chans = ['chan0', 'chan1', 'chan2', 'chan3']

batch_sizes = [1, 2, 3, 5, 10]

with tempfile.TemporaryDirectory() as temp_dir:
# create a dummy consensus directory
os.mkdir(os.path.join(temp_dir, 'pixel_mat_consensus'))

# Create a save directory
os.mkdir(os.path.join(temp_dir, 'pixel_masks'))

# Name suffix
name_suffix = ''

# generate sample fov folder with one channel value, no sub folder
channel_data = np.random.randint(low=0, high=5, size=(40, 40), dtype="int16")
os.mkdir(os.path.join(temp_dir, 'fov0'))
io.imsave(os.path.join(temp_dir, 'fov0', 'chan0.tif'), channel_data, check_contrast=False)

# create dummy data containing SOM and consensus labels for each fov
for fov in fovs:
consensus_data = pd.DataFrame(np.random.rand(100, 4), columns=chans)
consensus_data['pixel_som_cluster'] = np.tile(np.arange(1, 11), 10)
consensus_data['pixel_meta_cluster'] = np.tile(np.arange(1, 6), 20)
consensus_data['row_index'] = np.random.randint(low=0, high=40, size=100)
consensus_data['column_index'] = np.random.randint(low=0, high=40, size=100)

feather.write_dataframe(
consensus_data, os.path.join(temp_dir, 'pixel_mat_consensus', fov + '.feather')
)

# Test various batch_sizes, no sub_dir, name_suffix = ''.
for batch_size in batch_sizes:
generate_and_save_pixel_cluster_masks(fovs=fovs,
base_dir=temp_dir,
save_dir=os.path.join(temp_dir, 'pixel_masks'),
tiff_dir=temp_dir,
chan_file=os.path.join('fov0', 'chan0.tif'),
pixel_data_dir='pixel_mat_consensus',
pixel_cluster_col='pixel_meta_cluster',
sub_dir=None,
name_suffix=name_suffix,
batch_size=batch_size)

# Open each pixel mask and make sure the shape and values are valid.
for fov in fovs:
fov_name = fov + name_suffix + ".tiff"
pixel_mask = io.imread(os.path.join(temp_dir, 'pixel_masks', fov_name))
assert pixel_mask.shape == (40, 40)
assert np.all(pixel_mask <= 5)


def test_generate_and_save_cell_cluster_masks():
fov_count = 7
fovs = [f"fov{i}" for i in range(fov_count)]
som_cluster_cols = ['pixel_som_cluster_%d' % i for i in np.arange(5)]
meta_cluster_cols = ['pixel_meta_cluster_%d' % i for i in np.arange(3)]

batch_sizes = [1, 2, 3, 5, 10]

with tempfile.TemporaryDirectory() as temp_dir:
# Create a save directory
os.mkdir(os.path.join(temp_dir, 'cell_masks'))

# generate sample segmentation masks
cell_masks = np.random.randint(low=0, high=5, size=(fov_count, 40, 40, 1), dtype="int16")

for fov in range(cell_masks.shape[0]):
fov_whole_cell = cell_masks[fov, :, :, 0]
io.imsave(os.path.join(temp_dir, 'fov%d_feature_0.tif' % fov), fov_whole_cell,
check_contrast=False)

# create a sample cell consensus file based on SOM cluster assignments
consensus_data_som = pd.DataFrame()

# create a sample cell consensus file based on meta cluster assignments
consensus_data_meta = pd.DataFrame()

# generate sample cell data with SOM and meta cluster assignments for each fov
for fov in fovs:
som_data_fov = pd.DataFrame(
np.random.randint(low=0, high=100, size=(20, 5)), columns=som_cluster_cols
)

som_data_fov['fov'] = fov
som_data_fov['segmentation_label'] = som_data_fov.index.values + 1
som_data_fov['cell_som_cluster'] = np.tile(np.arange(1, 6), 4)
som_data_fov['cell_meta_cluster'] = np.tile(np.arange(1, 3), 10)

consensus_data_som = pd.concat([consensus_data_som, som_data_fov])

meta_data_fov = pd.DataFrame(
np.random.randint(low=0, high=100, size=(20, 3)), columns=meta_cluster_cols
)

meta_data_fov['fov'] = fov
meta_data_fov['segmentation_label'] = meta_data_fov.index.values + 1
meta_data_fov['cell_som_cluster'] = np.tile(np.arange(1, 6), 4)
meta_data_fov['cell_meta_cluster'] = np.tile(np.arange(1, 3), 10)

consensus_data_meta = pd.concat([consensus_data_meta, meta_data_fov])

# wrote both consensus DataFrames
feather.write_dataframe(
consensus_data_som, os.path.join(temp_dir, 'cluster_consensus_som.feather')
)

feather.write_dataframe(
consensus_data_som, os.path.join(temp_dir, 'cluster_consensus_meta.feather')
)

# Test various batch_sizes, no sub_dir, name_suffix = ''.
for batch_size in batch_sizes:
generate_and_save_cell_cluster_masks(fovs=fovs,
base_dir=temp_dir,
save_dir=os.path.join(temp_dir, 'cell_masks'),
seg_dir=temp_dir,
cell_data_name='cluster_consensus_som.feather',
cell_cluster_col='cell_som_cluster',
seg_suffix='_feature_0.tif',
sub_dir=None,
batch_size=batch_size
)

# Open each pixel mask and make sure the shape and values are valid.
for fov in fovs:
fov_name = fov + ".tiff"
pixel_mask = io.imread(os.path.join(temp_dir, 'cell_masks', fov_name))
assert pixel_mask.shape == (40, 40)
assert np.all(pixel_mask <= 5)
3 changes: 3 additions & 0 deletions ark/utils/load_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import os
import pathlib
from typing import List, Optional, Union
import glob
import warnings

import skimage.io as io
Expand Down
30 changes: 3 additions & 27 deletions ark/utils/notebooks_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,20 +381,8 @@ def flowsom_pixel_visualize(tb, flowsom_dir, fovs, pixel_prefix='test'):
cell_overlay_fovs = "pixel_fovs = %s" % str(fovs_overlay)
tb.inject(cell_overlay_fovs, after='pixel_overlay_fovs')

# generate the pixel cluster masks
tb.execute_cell('pixel_mask_gen')

# test the saving of pixel masks
# NOTE: no point testing save_pixel_masks = False since that doesn't run anything
cell_mask_save = """
data_utils.save_fov_images(
pixel_fovs,
os.path.join(base_dir, pixel_output_dir),
pixel_cluster_masks,
name_suffix='_pixel_mask'
)
"""
tb.inject(cell_mask_save, 'pixel_mask_save')
# generate the pixel cluster masks, and save them
tb.execute_cell('pixel_mask_gen_save')

# run the cell mask overlay
tb.execute_cell('pixel_overlay_gen')
Expand Down Expand Up @@ -732,19 +720,7 @@ def flowsom_cell_visualize(tb, flowsom_dir, fovs,
tb.inject(cell_overlay_fovs, after='cell_overlay_fovs')

# generate the cell cluster masks
tb.execute_cell('cell_mask_gen')

# test the saving of cell masks
# NOTE: no point testing save_cell_masks = False since that doesn't run anything
cell_mask_save = """
data_utils.save_fov_images(
cell_fovs,
os.path.join(base_dir, cell_output_dir),
cell_cluster_masks,
name_suffix='_cell_mask'
)
"""
tb.inject(cell_mask_save, 'cell_mask_save')
tb.execute_cell('cell_mask_gen_save')

# run the cell mask overlay
tb.execute_cell('cell_overlay_gen')
Expand Down
Loading

0 comments on commit 070beda

Please sign in to comment.