Skip to content

Commit

Permalink
Merge branch 'master' into travis_ci_docker_tags
Browse files Browse the repository at this point in the history
  • Loading branch information
srivarra committed Aug 22, 2022
2 parents d51d86f + d544d47 commit 69ff5d3
Show file tree
Hide file tree
Showing 9 changed files with 3,057 additions and 2,936 deletions.
1,202 changes: 1,202 additions & 0 deletions ark/phenotyping/cell_cluster_utils.py

Large diffs are not rendered by default.

1,495 changes: 1,495 additions & 0 deletions ark/phenotyping/cell_cluster_utils_test.py

Large diffs are not rendered by default.

1,229 changes: 0 additions & 1,229 deletions ark/phenotyping/som_utils.py → ark/phenotyping/pixel_cluster_utils.py

Large diffs are not rendered by default.

1,868 changes: 197 additions & 1,671 deletions ark/phenotyping/som_utils_test.py → ark/phenotyping/pixel_cluster_utils_test.py

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions ark/utils/metacluster_remap_gui/colormap_helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import colorsys
import itertools
import os

import matplotlib
import numpy as np
import pandas as pd

from ark.utils import misc_utils


def distinct_cmap(n=33):
Expand Down Expand Up @@ -51,3 +56,55 @@ def hue_to_hsvs(h):
hsvs = itertools.chain.from_iterable(hue_to_hsvs(hue) for hue in hues)
rgbs = (colorsys.hsv_to_rgb(*hsv) for hsv in hsvs)
return list(itertools.islice(rgbs, n))


def generate_meta_cluster_colormap_dict(meta_cluster_remap_path, cmap):
"""Returns a compact version of the colormap used in the interactive reclustering processes.
Generate a separate one for the raw meta cluster labels and the renamed meta cluster labels.
Used in the pixel and cell meta cluster overlays, as well as the
average weighted channel expression heatmaps for cell clustering
Args:
meta_cluster_remap_path (str):
Path to the file storing the mapping from SOM to meta clusters (raw and renamed)
cmap (matplotlib.colors.ListedColormap):
The colormap generated by the interactive reclustering process
Returns:
tuple:
- A `dict` containing the raw meta cluster labels mapped to their respective colors
- A `dict` containing the renamed meta cluster labels mapped to their respective colors
"""

# file path validation
if not os.path.exists(meta_cluster_remap_path):
raise FileNotFoundError('Remapping path %s does not exist' %
meta_cluster_remap_path)

# read the remapping
remapping = pd.read_csv(meta_cluster_remap_path)

# assert the correct columns are contained
misc_utils.verify_same_elements(
remapping_cols=remapping.columns.values,
required_cols=['cluster', 'metacluster', 'mc_name']
)

# define the raw meta cluster colormap
# NOTE: colormaps returned by interactive reclustering are zero-indexed
# need to subtract 1 to account for that
raw_colormap = {
i: cmap(i - 1) for i in np.unique(remapping['metacluster'])
}

# define the renamed meta cluster colormap
meta_id_to_name = dict(zip(remapping['metacluster'], remapping['mc_name']))
renamed_colormap = {
meta_id_to_name[meta_id]: meta_id_color
for (meta_id, meta_id_color) in raw_colormap.items()
}

return raw_colormap, renamed_colormap
70 changes: 69 additions & 1 deletion ark/utils/metacluster_remap_gui/colormap_helper_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
from .colormap_helper import distinct_cmap, distinct_rgbs
from ark.utils import misc_utils
from .colormap_helper import distinct_cmap, distinct_rgbs, generate_meta_cluster_colormap_dict
from matplotlib.colors import ListedColormap
import numpy as np
import os
import pandas as pd
import pytest
import tempfile


def test_colormap_is_distinct():
Expand All @@ -7,3 +14,64 @@ def test_colormap_is_distinct():

def test_colormap_runs():
distinct_cmap(10)


def test_generate_meta_cluster_colormap_dict():
with tempfile.TemporaryDirectory() as temp_dir:
# basic error check: bad remapping path
with pytest.raises(FileNotFoundError):
generate_meta_cluster_colormap_dict(
os.path.join(temp_dir, 'bad_remap_path.csv'), None
)

# basic error check: remapping data contains bad columns
with pytest.raises(ValueError):
bad_sample_remapping = {
'cluster': [i for i in np.arange(10)],
'metacluster': [int(i / 50) for i in np.arange(100)],
'mc_name_bad': ['meta' + str(int(i / 50)) for i in np.arange(100)]
}

bad_sample_remapping = pd.DataFrame.from_dict(bad_sample_remapping)
bad_sample_remapping.to_csv(
os.path.join(temp_dir, 'bad_sample_remapping.csv'),
index=False
)

generate_meta_cluster_colormap_dict(
os.path.join(temp_dir, 'bad_sample_remapping.csv'), None
)

# define a dummy remapping
sample_remapping = {
'cluster': [i for i in np.arange(100)],
'metacluster': [int(i / 50) + 1 for i in np.arange(100)],
'mc_name': ['meta' + str(int(i / 50) + 1) for i in np.arange(100)]
}

sample_remapping = pd.DataFrame.from_dict(sample_remapping)
sample_remapping.to_csv(
os.path.join(temp_dir, 'sample_remapping.csv'),
index=False
)

# define a sample ListedColormap
cmap = ListedColormap(['red', 'blue', 'green'])

raw_cmap, renamed_cmap = generate_meta_cluster_colormap_dict(
os.path.join(temp_dir, 'sample_remapping.csv'), cmap
)

# assert the correct meta cluster labels are contained in both dicts
misc_utils.verify_same_elements(
raw_cmap_keys=list(raw_cmap.keys()),
raw_meta_clusters=sample_remapping['metacluster'].values
)
misc_utils.verify_same_elements(
renamed_cmap_keys=list(renamed_cmap.keys()),
renamed_meta_clusters=sample_remapping['mc_name'].values
)

# assert the colors match up
assert raw_cmap[1] == renamed_cmap['meta1'] == (1.0, 0.0, 0.0, 1.0)
assert raw_cmap[2] == renamed_cmap['meta2'] == (0.0, 0.0, 1.0, 1.0)
6 changes: 3 additions & 3 deletions ark/utils/notebooks_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def flowsom_pixel_setup(tb, flowsom_dir, create_seg_dir=True, img_shape=(50, 50)
blurred_channels = %s
smooth_vals = 6
som_utils.smooth_channels(
pixel_cluster_utils.smooth_channels(
fovs=fovs,
tiff_dir=tiff_dir,
img_sub_folder=img_sub_folder,
Expand All @@ -238,7 +238,7 @@ def flowsom_pixel_setup(tb, flowsom_dir, create_seg_dir=True, img_shape=(50, 50)
filter_channel = '%s'
nuclear_exclude = True
som_utils.filter_with_nuclear_mask(
pixel_cluster_utils.filter_with_nuclear_mask(
fovs,
tiff_dir,
segmentation_dir,
Expand All @@ -253,7 +253,7 @@ def flowsom_pixel_setup(tb, flowsom_dir, create_seg_dir=True, img_shape=(50, 50)
filter_channel = '%s'
nuclear_exclude = False
som_utils.filter_with_nuclear_mask(
pixel_cluster_utils.filter_with_nuclear_mask(
fovs,
tiff_dir,
segmentation_dir,
Expand Down
39 changes: 20 additions & 19 deletions templates_ark/example_cell_clustering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@
"import xarray as xr\n",
"\n",
"from ark.analysis import visualize\n",
"from ark.phenotyping import som_utils\n",
"from ark.phenotyping import cell_cluster_utils\n",
"from ark.utils import data_utils, io_utils, load_utils, plot_utils\n",
"from ark.utils.metacluster_remap_gui import MetaClusterData, MetaClusterGui, metaclusterdata_from_files"
"from ark.utils.metacluster_remap_gui import colormap_helper, MetaClusterData, MetaClusterGui, metaclusterdata_from_files"
]
},
{
Expand Down Expand Up @@ -182,7 +182,7 @@
"* `cell_weights_name`: file name to place the cell SOM weights\n",
"* `cluster_counts_name`: file name to store the counts of each pixel SOM/meta clusters per cell\n",
"* `cluster_counts_norm_name`: same as above, except with each value normalized by the respective cell's size\n",
"* `weighted_cell_channel_name`: file name to store the weighted cell channel expression for each cell. Refer to <a href=https://ark-analysis.readthedocs.io/en/latest/_markdown/ark.phenotyping.html#ark.phenotyping.som_utils.compute_p2c_weighted_channel_avg>cell channel weighting docs</a> for how the weighting is computed.\n",
"* `weighted_cell_channel_name`: file name to store the weighted cell channel expression for each cell. Refer to <a href=https://ark-analysis.readthedocs.io/en/latest/_markdown/ark.phenotyping.html#ark.phenotyping.cell_cluster_utils.compute_p2c_weighted_channel_avg>cell channel weighting docs</a> for how the weighting is computed.\n",
"* `cell_clust_to_meta_name`: file name to store the mapping between cell SOM clusters and cell meta clusters\n",
"* `cell_som_cluster_count_avgs_name`: file name to store the average number of pixel SOM/meta clusters per cell SOM cluster\n",
"* `cell_meta_cluster_count_avgs_name`: same as above except for cell meta clusters\n",
Expand Down Expand Up @@ -255,9 +255,9 @@
"source": [
"Train the cell SOM on the normalized number of pixel SOM/meta clusters per cell (the data stored in `cluster_counts_norm_name`). Training is done using the `FlowSOM` algorithm. Note that each of the pixel SOM/meta cluster columns are normalized by their 99.9% value prior to training.\n",
"\n",
"Additionally, this function also computes the weighted cell channel table (the data stored in `weighted_cell_channel_name`). This data will be needed to compute the weighted average channel expression per cell cluster (the data stored in `cell_som_cluster_channel_avg_name` and `cell_meta_cluster_channel_avg_name`). See documentation of `compute_p2c_weighted_channel_avg` for how weighted cell channel average is computed: <a href=https://ark-analysis.readthedocs.io/en/latest/_markdown/ark.phenotyping.html#ark.phenotyping.som_utils.compute_p2c_weighted_channel_avg>cell channel weighting docs</a>.\n",
"Additionally, this function also computes the weighted cell channel table (the data stored in `weighted_cell_channel_name`). This data will be needed to compute the weighted average channel expression per cell cluster (the data stored in `cell_som_cluster_channel_avg_name` and `cell_meta_cluster_channel_avg_name`). See documentation of `compute_p2c_weighted_channel_avg` for how weighted cell channel average is computed: <a href=https://ark-analysis.readthedocs.io/en/latest/_markdown/ark.phenotyping.html#ark.phenotyping.cell_cluster_utils.compute_p2c_weighted_channel_avg>cell channel weighting docs</a>.\n",
"\n",
"For a full set of parameters you can customize for `train_cell_som`, please consult: <a href=https://ark-analysis.readthedocs.io/en/latest/_markdown/ark.phenotyping.html#ark.phenotyping.som_utils.train_cell_som>cell training docs</a>"
"For a full set of parameters you can customize for `train_cell_som`, please consult: <a href=https://ark-analysis.readthedocs.io/en/latest/_markdown/ark.phenotyping.html#ark.phenotyping.cell_cluster_utils.train_cell_som>cell training docs</a>"
]
},
{
Expand All @@ -271,7 +271,7 @@
"outputs": [],
"source": [
"# create the cell-level SOM weights\n",
"som_utils.train_cell_som(\n",
"cell_cluster_utils.train_cell_som(\n",
" fovs,\n",
" channels,\n",
" base_dir,\n",
Expand Down Expand Up @@ -314,7 +314,7 @@
"outputs": [],
"source": [
"# use cell SOM weights to assign cell clusters\n",
"som_utils.cluster_cells(\n",
"cell_cluster_utils.cluster_cells(\n",
" base_dir,\n",
" cluster_counts_norm_name=cluster_counts_norm_name,\n",
" weights_name=cell_weights_name,\n",
Expand Down Expand Up @@ -343,7 +343,7 @@
"* The meta cluster mapping for each cell SOM cluster in `cell_som_cluster_count_avgs_name` (data is resaved, same data except with an associated meta cluster column)\n",
"* The weighted channel average across all cell clusters (the data placed in `cell_som_cluster_channel_avgs_name` and `cell_meta_cluster_channel_avgs_name`). This will be done for both `'cell_som_cluster'` and `'cell_meta_cluster'`.\n",
"\n",
"For a full set of parameters you can customize for `cell_consensus_cluster`, please consult: <a href=https://ark-analysis.readthedocs.io/en/latest/_markdown/ark.phenotyping.html#ark.phenotyping.som_utils.cell_consensus_cluster>cell consensus clustering docs</a>"
"For a full set of parameters you can customize for `cell_consensus_cluster`, please consult: <a href=https://ark-analysis.readthedocs.io/en/latest/_markdown/ark.phenotyping.html#ark.phenotyping.cell_cluster_utils.cell_consensus_cluster>cell consensus clustering docs</a>"
]
},
{
Expand All @@ -368,7 +368,7 @@
"cap = 3\n",
"\n",
"# run hierarchical clustering based on cell SOM cluster assignments\n",
"som_utils.cell_consensus_cluster(\n",
"cell_cluster_utils.cell_consensus_cluster(\n",
" fovs=fovs,\n",
" channels=channels,\n",
" base_dir=base_dir,\n",
Expand Down Expand Up @@ -445,7 +445,7 @@
"cell_mcd = metaclusterdata_from_files(\n",
" os.path.join(base_dir, cell_som_cluster_count_avgs_name),\n",
" cluster_type='cell',\n",
" prefix_trim='pixel_meta_cluster_rename_'\n",
" prefix_trim=pixel_cluster_col + '_'\n",
")\n",
"cell_mcd.output_mapping_filename = os.path.join(base_dir, cell_meta_cluster_remap_name)\n",
"cell_mcg = MetaClusterGui(cell_mcd, width=17)"
Expand All @@ -468,7 +468,7 @@
},
"outputs": [],
"source": [
"som_utils.apply_cell_meta_cluster_remapping(\n",
"cell_cluster_utils.apply_cell_meta_cluster_remapping(\n",
" fovs,\n",
" channels,\n",
" base_dir,\n",
Expand Down Expand Up @@ -500,7 +500,7 @@
},
"outputs": [],
"source": [
"raw_cmap, renamed_cmap = som_utils.generate_meta_cluster_colormap_dict(\n",
"raw_cmap, renamed_cmap = colormap_helper.generate_meta_cluster_colormap_dict(\n",
" cell_mcd.output_mapping_filename,\n",
" cell_mcg.im_cl.cmap\n",
")"
Expand All @@ -523,7 +523,7 @@
},
"outputs": [],
"source": [
"som_utils.generate_weighted_channel_avg_heatmap(\n",
"cell_cluster_utils.generate_weighted_channel_avg_heatmap(\n",
" os.path.join(base_dir, cell_som_cluster_channel_avg_name),\n",
" 'cell_som_cluster',\n",
" channels,\n",
Expand All @@ -549,7 +549,7 @@
},
"outputs": [],
"source": [
"som_utils.generate_weighted_channel_avg_heatmap(\n",
"cell_cluster_utils.generate_weighted_channel_avg_heatmap(\n",
" os.path.join(base_dir, cell_meta_cluster_channel_avg_name),\n",
" 'cell_meta_cluster_rename',\n",
" channels,\n",
Expand Down Expand Up @@ -655,7 +655,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"\"The cell table with consensus cluster labels is saved to `{cell_table_path}_cell_labels.csv`\""
"The cell table with consensus cluster labels is saved to `{cell_table_path}_cell_labels.csv`"
]
},
{
Expand All @@ -668,7 +668,7 @@
},
"outputs": [],
"source": [
"som_utils.add_consensus_labels_cell_table(\n",
"cell_cluster_utils.add_consensus_labels_cell_table(\n",
" base_dir, cell_table_path, cell_data_name\n",
")"
]
Expand Down Expand Up @@ -700,13 +700,14 @@
" mapping = os.path.join(base_dir, cell_meta_cluster_remap_name),\n",
" seg_dir=segmentation_dir,\n",
" mask_suffix=\"_cell_mask\",\n",
" img_sub_folder='')"
" img_sub_folder=''\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.12 ('ark-3.7-R')",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -720,7 +721,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
"version": "3.6.13"
},
"vscode": {
"interpreter": {
Expand Down
Loading

0 comments on commit 69ff5d3

Please sign in to comment.