Skip to content

Commit

Permalink
Add new preprocessing techniques
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-l-kong committed Nov 20, 2020
1 parent abfe32c commit aff7084
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 409 deletions.
65 changes: 4 additions & 61 deletions ark/phenotyping/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,60 +11,6 @@
from ark.utils import load_utils
from ark.utils import misc_utils

from timeit import default_timer


def _unpacking_apply_along_axis(func_args):
"""Maps apply_along_axis to each array subset
Args:
func_args (tuple):
The arguments to pass into apply_along_axis
Returns:
numpy.ndarray:
The result for apply_along_axis for a specific array subset
"""

(func1d, axis, arr, args, kwargs) = func_args
return np.apply_along_axis(func1d, axis, arr, *args, **kwargs)


def _parallel_apply_along_axis(func1d, axis, arr, *args, **kwargs):
"""Runs apply_along_axis in parallel for a massive speedup (but at a massive memory cost too)
Args:
func1d (Callable):
The function to apply along the given axis
axis (int):
The axis to run
Returns:
numpy.ndarray:
The processed array, equivalent to what np.apply_along_axis returns
"""

# If axis is 0 we'll effectively be working with the 1st axis
effective_axis = 1 if axis == 0 else axis

# Swap the 0th and 1st axis if axis specified is 0
if effective_axis != axis:
arr = arr.swapaxes(axis, effective_axis)

# Map the array subsets
chunks = [(func1d, effective_axis, sub_arr, args, kwargs)
for sub_arr in np.array_split(arr, multiprocessing.cpu_count())]

# Run the process in parallel
pool = multiprocessing.Pool()
individual_results = pool.map(_unpacking_apply_along_axis, chunks)

# Free the workers
pool.close()
pool.join()

return np.concatenate(individual_results)


def create_pixel_matrix(img_xr, seg_labels, fovs=None, channels=None, blur_factor=2):
"""Preprocess the images for FlowSOM clustering and creates a pixel-level matrix
Expand Down Expand Up @@ -98,20 +44,17 @@ def create_pixel_matrix(img_xr, seg_labels, fovs=None, channels=None, blur_facto
misc_utils.verify_in_list(fovs=fovs, image_fovs=img_xr.fovs.values)
misc_utils.verify_in_list(channels=channels, image_channels=img_xr.channels.values)

# delete any fovs and channels we don't need
img_xr_subset = img_xr.loc[fovs, :, :, channels]

# define our flowsom matrix
flowsom_data = None

# iterate over fovs
for fov in fovs:
# subset img_xr with only the fov we're looking for
img_xr_sub = img_xr_subset.loc[fov, ...].values
img_data_blur = img_xr.loc[fov, ..., channels].values

# apply a Gaussian blur for each marker
img_data_blur = _parallel_apply_along_axis(ndimage.gaussian_filter, axis=2,
arr=img_xr_sub, sigma=blur_factor)
for marker in range(len(channels)):
img_data_blur[:, :, marker] = ndimage.gaussian_filter(img_data_blur[:, :, marker],
sigma=blur_factor)

# flatten each image
pixel_mat = img_data_blur.reshape(-1, len(channels))
Expand Down
Loading

0 comments on commit aff7084

Please sign in to comment.