Skip to content

Commit

Permalink
Merge branch 'main' into mixing_score
Browse files Browse the repository at this point in the history
  • Loading branch information
camisowers committed Feb 17, 2023
2 parents e4e186b + 3a24d44 commit 461cff6
Show file tree
Hide file tree
Showing 13 changed files with 1,957 additions and 781 deletions.
17 changes: 17 additions & 0 deletions docs/_rtd/data_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,20 @@ Shape: 2D matrix of num_cells x num_cells
Description: Many of the spatial analysis functions in the `analysis` module use distances between cells to compute interesting spatial properties. The distance matrix holds that information. Each matrix is a square array, where the value of cell (**i**, **j**) in the matrix represents the distance between cell **i** and cell **j**.

Note: `calc_dist_matrix` produces a dictionary of distancs matrixes; each distance matrix takes the form described above

---

Name: generalized cell clustering inputs
Type: pandas.DataFrame
Created by: user
Used by: [3b_Generic_Cluster_Cells.ipynb](https://github.com/angelolab/ark-analysis/blob/main/templates/3b_Generic_Cluster_Cells.ipynb)

Shape: 2D matrix of num_cells x num_expression_columns

Description: users may want to run cell clustering on custom expression inputs outside of pixel cluster counts. The input data will need to be defined as a CSV to support DataFrame operations.

The CSV should contain the following columns

* `fov`: name of the FOV the cell comes from
* `segmentation_label`: the name of the segmentation label
* A set of expression columns defining the properties of each cell desired for clustering
776 changes: 435 additions & 341 deletions src/ark/phenotyping/cell_cluster_utils.py

Large diffs are not rendered by default.

16 changes: 6 additions & 10 deletions src/ark/phenotyping/cluster_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,15 +236,15 @@ def assign_som_clusters(self, external_data: pd.DataFrame) -> pd.DataFrame:


class CellSOMCluster(PixieSOMCluster):
def __init__(self, cell_data_path: pathlib.Path, weights_path: pathlib.Path,
def __init__(self, cell_data: pd.DataFrame, weights_path: pathlib.Path,
fovs: List[str], columns: List[str], num_passes: int = 1,
xdim: int = 10, ydim: int = 10, lr_start: float = 0.05, lr_end: float = 0.01,
seed=42):
"""Creates a cell SOM cluster object derived from the abstract PixieSOMCluster
Args:
cell_data_path (pathlib.Path):
The name of the cell dataset to use for training
cell_data (pandas.DataFrame):
The dataset to use for training
weights_path (pathlib.Path):
The path to save the weights to.
fovs (List[str]):
Expand All @@ -268,12 +268,8 @@ def __init__(self, cell_data_path: pathlib.Path, weights_path: pathlib.Path,
weights_path, columns, num_passes, xdim, ydim, lr_start, lr_end, seed
)

# path validation
validate_paths([cell_data_path])
self.cell_data_path = cell_data_path

# load the cell data in
self.cell_data = feather.read_dataframe(cell_data_path)
# assign the cell data
self.cell_data = cell_data

# define the fovs used
self.fovs = fovs
Expand Down Expand Up @@ -329,7 +325,7 @@ def assign_som_clusters(self) -> pd.DataFrame:
`cell_data` with the SOM clusters assigned.
"""
# cell_data is already normalized, don't repeat
som_labels = super().generate_som_clusters(self.cell_data)
som_labels = super().generate_som_clusters(self.cell_data[self.columns])

# assign SOM clusters to cell_data
self.cell_data['cell_som_cluster'] = som_labels
Expand Down
2 changes: 1 addition & 1 deletion src/ark/phenotyping/pixel_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1011,7 +1011,7 @@ def cluster_pixels(fovs, channels, base_dir, pixel_pysom, data_dir='pixel_mat_da

# raise error if weights haven't been assigned to pixel_pysom
if pixel_pysom.weights is None:
raise ValueError("Using untrained pixel_pysom object, please invoke train_som first")
raise ValueError("Using untrained pixel_pysom object, please invoke train_pixel_som first")

# verify that all provided fovs exist in the folder
# NOTE: remove the channel and pixel normalization files as those are not pixel data
Expand Down
24 changes: 10 additions & 14 deletions src/ark/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def label_cells_by_cluster(fov, all_data, label_map, fov_col=settings.FOV_ID,
return relabeled_img_array


def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data,
cell_cluster_col='cell_meta_cluster',
seg_suffix='_whole_cell.tiff'):
"""For a fov, create a mask labeling each cell with their SOM or meta cluster label
Expand All @@ -140,8 +140,8 @@ def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
The path to the data directory
seg_dir (str):
The path to the segmentation data
cell_data_name (str):
The path to the cell data with both cell SOM and meta cluster assignments
cell_data (pandas.DataFrame):
The cell data with both cell SOM and meta cluster assignments
cell_cluster_col (str):
Whether to assign SOM or meta clusters.
Needs to be `'cell_som_cluster'` or `'cell_meta_cluster'`
Expand All @@ -154,20 +154,16 @@ def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
"""

# path checking
cell_data_path = os.path.join(os.path.join(base_dir, cell_data_name))
io_utils.validate_paths([seg_dir, cell_data_path])
io_utils.validate_paths([seg_dir])

# verify the cluster_col provided is valid
misc_utils.verify_in_list(
provided_cluster_col=cell_cluster_col,
valid_cluster_cols=['cell_som_cluster', 'cell_meta_cluster']
)

# load the consensus data in
cell_consensus_data = feather.read_dataframe(os.path.join(base_dir, cell_data_name))

# ensure the cluster col will be displayed as an integer and not a float
cell_consensus_data[cell_cluster_col] = cell_consensus_data[cell_cluster_col].astype(int)
cell_data[cell_cluster_col] = cell_data[cell_cluster_col].astype(int)

# define the file for whole cell
whole_cell_files = [fov + seg_suffix]
Expand All @@ -180,7 +176,7 @@ def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,

# use label_cells_by_cluster to create cell masks
img_data = label_cells_by_cluster(
fov, cell_consensus_data, label_map, fov_col='fov',
fov, cell_data, label_map, fov_col='fov',
cell_label_column='segmentation_label', cluster_column=cell_cluster_col
)

Expand All @@ -191,7 +187,7 @@ def generate_and_save_cell_cluster_masks(fovs: List[str],
base_dir: Union[pathlib.Path, str],
save_dir: Union[pathlib.Path, str],
seg_dir: Union[pathlib.Path, str],
cell_data_name: Union[pathlib.Path, str],
cell_data: pd.DataFrame,
cell_cluster_col: str = 'cell_meta_cluster',
seg_suffix: str = '_whole_cell.tiff',
sub_dir: str = None,
Expand All @@ -207,8 +203,8 @@ def generate_and_save_cell_cluster_masks(fovs: List[str],
The directory to save the generated cell cluster masks.
seg_dir (Union[pathlib.Path, str]):
The path to the segmentation data.
cell_data_name (Union[pathlib.Path, str]):
The path to the cell data with both cell SOM and meta cluster assignments
cell_data (pandas.DataFrame):
The cell data with both cell SOM and meta cluster assignments
cell_cluster_col (str, optional):
Whether to assign SOM or meta clusters. Needs to be `'cell_som_cluster'` or
`'cell_meta_cluster'`. Defaults to `'cell_meta_cluster'`.
Expand All @@ -228,7 +224,7 @@ def generate_and_save_cell_cluster_masks(fovs: List[str],
# generate the cell mask for the FOV
cell_mask: np.ndarray =\
generate_cell_cluster_mask(fov=fov, base_dir=base_dir, seg_dir=seg_dir,
cell_data_name=cell_data_name,
cell_data=cell_data,
cell_cluster_col=cell_cluster_col,
seg_suffix=seg_suffix)

Expand Down
2 changes: 1 addition & 1 deletion templates/2_Pixie_Cluster_Pixels.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@
"\n",
"Note that each channel is normalized by the respective value stored in `norm_vals_name` (computed in `train_pixel_som`) prior to cluster assignment.\n",
"\n",
"`generate_som_avg_files` will then computes the average channel expression across all pixel SOM clusters as well as the number of pixels in each pixel SOM cluster (the data placed in `pc_chan_avg_som_cluster_name`). This is needed for pixel consensus clustering."
"`generate_som_avg_files` will then compute the average channel expression across all pixel SOM clusters as well as the number of pixels in each pixel SOM cluster (the data placed in `pc_chan_avg_som_cluster_name`). This is needed for pixel consensus clustering."
]
},
{
Expand Down
Loading

0 comments on commit 461cff6

Please sign in to comment.