Skip to content

Commit

Permalink
Merge 367b5cb into d454d09
Browse files Browse the repository at this point in the history
  • Loading branch information
srivarra committed Dec 7, 2022
2 parents d454d09 + 367b5cb commit 8fc78fc
Show file tree
Hide file tree
Showing 11 changed files with 52 additions and 42 deletions.
2 changes: 0 additions & 2 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@ exclude_lines =
raise NotImplementedError
(ArrowInvalid, OSError, IOError)


ignore_errors = True
fail_under = 45

# show which lines are missing
show_missing = False

omit =
segmentation/scratch.py
**/*test*.py
3 changes: 3 additions & 0 deletions ark/analysis/spatial_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ def generate_channel_spatial_enrichment_stats(label_dir, dist_mat_dir, marker_th
cluster_names
"""

# Validate paths
io_utils.validate_paths([label_dir, dist_mat_dir])

# parse files in label_dir
all_label_names = io_utils.list_files(label_dir, substrs=[suffix + '.tiff'])

Expand Down
32 changes: 18 additions & 14 deletions ark/phenotyping/cell_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import scipy.stats as stats

from ark.analysis import visualize
from ark.utils import io_utils, misc_utils
from ark.phenotyping import cluster_helpers
from ark.utils import misc_utils, io_utils


def compute_cell_cluster_count_avg(cell_cluster_path, pixel_cluster_col_prefix,
Expand All @@ -35,15 +35,18 @@ def compute_cell_cluster_count_avg(cell_cluster_path, pixel_cluster_col_prefix,
Contains the average values for each column across cell SOM clusters
"""

# Validate paths
io_utils.validate_paths(cell_cluster_path)

# verify the pixel cluster col prefix specified is valid
misc_utils.verify_in_list(
provided_cluster_col=[pixel_cluster_col_prefix],
provided_cluster_col=pixel_cluster_col_prefix,
valid_cluster_cols=['pixel_som_cluster', 'pixel_meta_cluster_rename']
)

# verify the cell cluster col prefix specified is valid
misc_utils.verify_in_list(
provided_cluster_col=[cell_cluster_col],
provided_cluster_col=cell_cluster_col,
valid_cluster_cols=['cell_som_cluster', 'cell_meta_cluster']
)

Expand Down Expand Up @@ -95,9 +98,10 @@ def compute_cell_cluster_channel_avg(fovs, channels, base_dir,
pandas.DataFrame:
Each cell cluster mapped to the average expression for each marker
"""
weighted_cell_channel_name_path: str = os.path.join(base_dir, weighted_cell_channel_name)

# verify the cell table actually exists
io_utils.validate_paths(os.path.join(base_dir, weighted_cell_channel_name))
io_utils.validate_paths(weighted_cell_channel_name_path)

# verify the cell cluster col specified is valid
misc_utils.verify_in_list(
Expand All @@ -106,7 +110,7 @@ def compute_cell_cluster_channel_avg(fovs, channels, base_dir,
)

# read the weighted cell channel table in
cell_table = pd.read_csv(os.path.join(base_dir, weighted_cell_channel_name))
cell_table = pd.read_csv(weighted_cell_channel_name_path)

# subset on only the fovs the user has specified
cell_table = cell_table[cell_table['fov'].isin(fovs)]
Expand Down Expand Up @@ -182,16 +186,16 @@ def compute_p2c_weighted_channel_avg(pixel_channel_avg, channels, cell_counts,
# if no fovs provided make sure they're all iterated over
if fovs is None:
fovs = list(cell_counts['fov'].unique())

# verify that the fovs provided are valid
misc_utils.verify_in_list(
provided_fovs=fovs,
dataset_fovs=cell_counts['fov'].unique()
)
else:
# verify that the fovs provided are valid
misc_utils.verify_in_list(
provided_fovs=fovs,
dataset_fovs=cell_counts['fov'].unique()
)

# verify the pixel_cluster_col provided is valid
misc_utils.verify_in_list(
provided_cluster_col=[pixel_cluster_col],
provided_cluster_col=pixel_cluster_col,
valid_cluster_cols=['pixel_som_cluster', 'pixel_meta_cluster_rename']
)

Expand All @@ -211,7 +215,7 @@ def compute_p2c_weighted_channel_avg(pixel_channel_avg, channels, cell_counts,
# sort the pixel_channel_avg table by pixel_cluster_col in ascending cluster order
# NOTE: to handle numeric cluster names types, we need to cast the pixel_cluster_col values
# to str to ensure the same sorting is used
if pixel_channel_avg[pixel_cluster_col].dtype == int:
if np.issubdtype(pixel_channel_avg[pixel_cluster_col].dtype, np.integer):
pixel_channel_avg[pixel_cluster_col] = pixel_channel_avg[pixel_cluster_col].astype(str)

pixel_channel_avg_sorted = pixel_channel_avg.sort_values(by=pixel_cluster_col)
Expand Down Expand Up @@ -448,7 +452,7 @@ def train_cell_som(fovs, channels, base_dir, pixel_data_dir, cell_table_path,

# verify the cluster_col provided is valid
misc_utils.verify_in_list(
provided_cluster_col=[pixel_cluster_col],
provided_cluster_col=pixel_cluster_col,
valid_cluster_cols=['pixel_som_cluster', 'pixel_meta_cluster_rename']
)

Expand Down
4 changes: 2 additions & 2 deletions ark/phenotyping/cell_cluster_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,15 @@ def test_compute_cell_cluster_count_avg():

with tempfile.TemporaryDirectory() as temp_dir:
# error check: bad pixel_cluster_col_prefix specified
with pytest.raises(ValueError):
with pytest.raises(FileNotFoundError):
cell_cluster_utils.compute_cell_cluster_count_avg(
'clustered_path', 'bad_cluster_col_prefix', 'cell_cluster_col', False
)

# error check: bad cell_cluster_col specified
with pytest.raises(ValueError):
cell_cluster_utils.compute_cell_cluster_count_avg(
'clustered_path', 'pixel_meta_cluster', 'bad_cluster_col', False
temp_dir, 'pixel_meta_cluster', 'bad_cluster_col', False
)

cluster_col_arr = [pixel_som_clusters, pixel_meta_clusters]
Expand Down
23 changes: 12 additions & 11 deletions ark/phenotyping/post_cluster_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from typing import List

import matplotlib.pyplot as plt
import numpy as np
Expand All @@ -22,9 +23,8 @@ def plot_hist_thresholds(cell_table, populations, marker, pop_col='cell_meta_clu
"""
all_populations = cell_table[pop_col].unique()

# input validation
if type(populations) != list:
raise ValueError("populations argument must be a list of populations to plot")
# Make populations a list if it is a string
populations: List[str] = misc_utils.make_iterable(populations, ignore_str=True)

# check that provided populations are present in dataframe
for pop in populations:
Expand All @@ -40,14 +40,15 @@ def plot_hist_thresholds(cell_table, populations, marker, pop_col='cell_meta_clu

# plot each pop histogram
pop_num = len(populations)
fig, ax = plt.subplots(pop_num, 1, figsize=[6.4, 2.2 * pop_num])
for i in range(pop_num):
plot_vals = cell_table.loc[cell_table[pop_col] == populations[i], marker].values
ax[i].hist(plot_vals, 50, density=True, facecolor='g', alpha=0.75, range=(0, x_max))
ax[i].set_title("Distribution of {} in {}".format(marker, populations[i]))

if threshold is not None:
ax[i].axvline(x=threshold)
fig, axes = plt.subplots(pop_num, 1, figsize=[6.4, 2.2 * pop_num], squeeze=False)
for ax, pop in zip(axes.flat, populations):
plot_vals = cell_table.loc[cell_table[pop_col] == pop, marker].values
ax.hist(plot_vals, 50, density=True, facecolor='g', alpha=0.75, range=(0, x_max))
ax.set_title("Distribution of {} in {}".format(marker, pop))

if threshold:
ax.axvline(x=threshold)

plt.tight_layout()


Expand Down
8 changes: 4 additions & 4 deletions ark/phenotyping/post_cluster_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ def test_plot_hist_thresholds():

cell_table = pd.DataFrame({'cell_meta_cluster': pops, 'marker_1': marker_1})

# populations argument must be a list
with pytest.raises(ValueError, match='must be a list'):
post_cluster_utils.plot_hist_thresholds(cell_table=cell_table, populations='pop1',
marker='marker_1')
# populations argument must be a list, but`make_iterable` should convert a `str`
# argument to `List[str]`
post_cluster_utils.plot_hist_thresholds(cell_table=cell_table, populations='pop1',
marker='marker_1')

# populations argument must contain entries from cell_table
with pytest.raises(ValueError, match='Invalid population'):
Expand Down
7 changes: 5 additions & 2 deletions ark/utils/example_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

import datasets

from ark.utils.misc_utils import verify_in_list


class ExampleDataset():
def __init__(self, dataset: str, overwrite_existing: bool = True, cache_dir: str = None,
Expand Down Expand Up @@ -163,10 +165,11 @@ def get_example_dataset(dataset: str, save_dir: Union[str, pathlib.Path],
"pairwise_spatial_enrichment"]

# Check the appropriate dataset name
if dataset not in valid_datasets:
try:
verify_in_list(dataset=dataset, valid_datasets=valid_datasets)
except ValueError:
ValueError(f"The dataset <{dataset}> is not one of the valid datasets available. \
The following are available: { {*valid_datasets} }")

example_dataset = ExampleDataset(dataset=dataset, overwrite_existing=overwrite_existing,
cache_dir=None,
revision="main")
Expand Down
7 changes: 4 additions & 3 deletions ark/utils/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

from ark.settings import EXTENSION_TYPES

from ark.utils import misc_utils


def validate_paths(paths, data_prefix=False):
"""Verifys that paths exist and don't leave Docker's scope
"""Verifies that paths exist and don't leave Docker's scope
Args:
paths (str or list):
Expand All @@ -22,8 +24,7 @@ def validate_paths(paths, data_prefix=False):
"""

# if given a single path, convert to list
if not isinstance(paths, list):
paths = [paths]
paths = misc_utils.make_iterable(paths, ignore_str=True)

for path in paths:
# check data prefix
Expand Down
2 changes: 1 addition & 1 deletion ark/utils/metacluster_remap_gui/file_reader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_can_read_csvs_prefix_trim(simple_full_cluster_data):

def test_requires_valid_path(simple_full_cluster_data):
with tempfile.TemporaryDirectory() as temp_dir:
simple_full_cluster_data.to_csv('sample.csv', index=False)
simple_full_cluster_data.to_csv(temp_dir + '/sample.csv', index=False)

with pytest.raises(FileNotFoundError):
metaclusterdata_from_files(os.path.join(temp_dir, 'bad_sample.csv'))
Expand Down
4 changes: 2 additions & 2 deletions ark/utils/plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,13 +263,13 @@ def create_overlay(fov, segmentation_dir, data_dir,
fov (str):
The name of the fov to overlay
segmentation_dir (str):
The path to the directory containing the segmentatation data
The path to the directory containing the segmentation data
data_dir (str):
The path to the directory containing the nuclear and whole cell image data
img_overlay_chans (list):
List of channels the user will overlay
seg_overlay_comp (str):
The segmentted compartment the user will overlay
The segmented compartment the user will overlay
alternate_segmentation (numpy.ndarray):
2D numpy array of labeled cell objects
Returns:
Expand Down
2 changes: 1 addition & 1 deletion templates/4_Post_Clustering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.15 (default, Nov 11 2022, 16:55:28) \n[Clang 14.0.0 (clang-1400.0.29.202)]"
"version": "3.8.15"
},
"vscode": {
"interpreter": {
Expand Down

0 comments on commit 8fc78fc

Please sign in to comment.