Skip to content

Commit

Permalink
Remove deterministic argument from som function in pyFlowSOM (#893)
Browse files Browse the repository at this point in the history
* Remove deterministic argument from SOM

* Add explicit seed setting for Pixie SOM training

* Update pyFlowSOM to 0.1.14

* Remove requirements.txt as well

* Update pyFlowSOM to 0.1.14

* Abstract seed to PixieSOMCluster and ensure it actually gets passed to pyFlowSOM.som

* Add explicit seed params to pixel and cell clustering notebooks
  • Loading branch information
alex-l-kong committed Feb 16, 2023
1 parent d536fde commit 51fcb02
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 20 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies = [
"palettable>=3.3.0,<4",
"pandas>=1.3,<2",
"pillow>=9,<10",
"pyFlowSOM==0.1.13",
"pyFlowSOM==0.1.14",
"requests>=2.20,<3",
"scikit-image>=0.19,<0.20",
"scikit-learn>=1.1,<2",
Expand Down
7 changes: 5 additions & 2 deletions src/ark/phenotyping/cell_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ def train_cell_som(fovs, channels, base_dir, pixel_data_dir, cell_table_path,
pc_chan_avg_name='pc_chan_avg.csv',
som_weights_name='cell_som_weights.feather',
weighted_cell_channel_name='weighted_cell_channel.feather',
xdim=10, ydim=10, lr_start=0.05, lr_end=0.01, num_passes=1):
xdim=10, ydim=10, lr_start=0.05, lr_end=0.01, num_passes=1, seed=42):
"""Run the SOM training on the number of pixel/meta clusters in each cell of each fov
Saves the SOM weights to `base_dir/som_weights_name`. Computes and saves weighted
Expand Down Expand Up @@ -461,6 +461,8 @@ def train_cell_som(fovs, channels, base_dir, pixel_data_dir, cell_table_path,
The end learning rate for the SOM, decays from `lr_start`
num_passes (int):
The number of training passes to make through the dataset
seed (int):
The random seed to use for training the SOM
Returns:
cluster_helpers.CellSOMCluster:
Expand Down Expand Up @@ -507,7 +509,8 @@ def train_cell_som(fovs, channels, base_dir, pixel_data_dir, cell_table_path,
# define the cell SOM cluster object
cell_pysom = cluster_helpers.CellSOMCluster(
cluster_counts_size_norm_path, som_weights_path, fovs, cluster_count_cols,
num_passes=num_passes, xdim=xdim, ydim=ydim, lr_start=lr_start, lr_end=lr_end
num_passes=num_passes, xdim=xdim, ydim=ydim, lr_start=lr_start, lr_end=lr_end,
seed=seed
)

# train the SOM weights
Expand Down
27 changes: 18 additions & 9 deletions src/ark/phenotyping/cluster_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
class PixieSOMCluster(ABC):
@abstractmethod
def __init__(self, weights_path: pathlib.Path, columns: List[str], num_passes: int = 1,
xdim: int = 10, ydim: int = 10, lr_start: float = 0.05, lr_end: float = 0.01):
xdim: int = 10, ydim: int = 10, lr_start: float = 0.05, lr_end: float = 0.01,
seed=42):
"""Generic implementation of a pyFlowSOM runner
Args:
Expand All @@ -37,6 +38,8 @@ def __init__(self, weights_path: pathlib.Path, columns: List[str], num_passes: i
The initial learning rate.
lr_end (float):
The learning rate to decay to
seed (int):
The random seed to use for training.
"""
self.weights_path = weights_path
self.weights = None if not os.path.exists(weights_path) else feather.read_dataframe(
Expand All @@ -48,6 +51,7 @@ def __init__(self, weights_path: pathlib.Path, columns: List[str], num_passes: i
self.ydim = ydim
self.lr_start = lr_start
self.lr_end = lr_end
self.seed = seed

@abstractmethod
def normalize_data(self) -> pd.DataFrame:
Expand All @@ -65,10 +69,10 @@ def train_som(self, data: pd.DataFrame):
data (pandas.DataFrame):
The input data to train the SOM on.
"""
# make sure to run a deterministic SOM for reproducibility purposes

som_weights = som(
data=data.values, xdim=self.xdim, ydim=self.ydim, rlen=self.num_passes,
alpha_range=(self.lr_start, self.lr_end), deterministic=True
alpha_range=(self.lr_start, self.lr_end), seed=self.seed
)

# ensure dimensions of weights are flattened
Expand Down Expand Up @@ -118,7 +122,7 @@ class PixelSOMCluster(PixieSOMCluster):
def __init__(self, pixel_subset_folder: pathlib.Path, norm_vals_path: pathlib.Path,
weights_path: pathlib.Path, fovs: List[str], columns: List[str],
num_passes: int = 1, xdim: int = 10, ydim: int = 10,
lr_start: float = 0.05, lr_end: float = 0.01):
lr_start: float = 0.05, lr_end: float = 0.01, seed=42):
"""Creates a pixel SOM cluster object derived from the abstract PixieSOMCluster
Args:
Expand All @@ -141,10 +145,12 @@ def __init__(self, pixel_subset_folder: pathlib.Path, norm_vals_path: pathlib.Pa
lr_start (float):
The initial learning rate.
lr_end (float):
The learning rate to decay to
The learning rate to decay to.
seed (int):
The random seed to use.
"""
super().__init__(
weights_path, columns, num_passes, xdim, ydim, lr_start, lr_end
weights_path, columns, num_passes, xdim, ydim, lr_start, lr_end, seed
)

# path validation
Expand Down Expand Up @@ -232,7 +238,8 @@ def assign_som_clusters(self, external_data: pd.DataFrame) -> pd.DataFrame:
class CellSOMCluster(PixieSOMCluster):
def __init__(self, cell_data_path: pathlib.Path, weights_path: pathlib.Path,
fovs: List[str], columns: List[str], num_passes: int = 1,
xdim: int = 10, ydim: int = 10, lr_start: float = 0.05, lr_end: float = 0.01):
xdim: int = 10, ydim: int = 10, lr_start: float = 0.05, lr_end: float = 0.01,
seed=42):
"""Creates a cell SOM cluster object derived from the abstract PixieSOMCluster
Args:
Expand All @@ -253,10 +260,12 @@ def __init__(self, cell_data_path: pathlib.Path, weights_path: pathlib.Path,
lr_start (float):
The initial learning rate.
lr_end (float):
The learning rate to decay to
The learning rate to decay to.
seed (int):
The random seed to use.
"""
super().__init__(
weights_path, columns, num_passes, xdim, ydim, lr_start, lr_end
weights_path, columns, num_passes, xdim, ydim, lr_start, lr_end, seed
)

# path validation
Expand Down
7 changes: 5 additions & 2 deletions src/ark/phenotyping/pixel_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,7 @@ def train_pixel_som(fovs, channels, base_dir,
subset_dir='pixel_mat_subsetted',
norm_vals_name='post_rowsum_chan_norm.feather',
som_weights_name='pixel_som_weights.feather', xdim=10, ydim=10,
lr_start=0.05, lr_end=0.01, num_passes=1):
lr_start=0.05, lr_end=0.01, num_passes=1, seed=42):
"""Run the SOM training on the subsetted pixel data.
Saves SOM weights to `base_dir/som_weights_name`.
Expand Down Expand Up @@ -902,6 +902,8 @@ def train_pixel_som(fovs, channels, base_dir,
The end learning rate for the SOM, decays from `lr_start`
num_passes (int):
The number of training passes to make through the dataset
seed (int):
The random seed to use for training the SOM
Returns:
cluster_helpers.PixelSOMCluster:
Expand Down Expand Up @@ -930,7 +932,8 @@ def train_pixel_som(fovs, channels, base_dir,
# define the pixel SOM cluster object
pixel_pysom = cluster_helpers.PixelSOMCluster(
subsetted_path, norm_vals_path, som_weights_path, fovs, channels,
num_passes=num_passes, xdim=xdim, ydim=ydim, lr_start=lr_start, lr_end=lr_end
num_passes=num_passes, xdim=xdim, ydim=ydim, lr_start=lr_start, lr_end=lr_end,
seed=seed
)

# train the SOM weights
Expand Down
7 changes: 4 additions & 3 deletions templates/2_Pixie_Cluster_Pixels.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,8 @@
" subset_dir=pixel_subset_dir,\n",
" norm_vals_name=norm_vals_name,\n",
" som_weights_name=pixel_som_weights_name,\n",
" num_passes=1\n",
" num_passes=1,\n",
" seed=42\n",
")"
]
},
Expand Down Expand Up @@ -878,7 +879,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "ark38",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -892,7 +893,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
"version": "3.8.15"
},
"toc-autonumbering": false,
"toc-showcode": true,
Expand Down
7 changes: 4 additions & 3 deletions templates/3_Pixie_Cluster_Cells.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,8 @@
" pc_chan_avg_name=pc_chan_avg_name,\n",
" som_weights_name=cell_som_weights_name,\n",
" weighted_cell_channel_name=weighted_cell_channel_name,\n",
" num_passes=1\n",
" num_passes=1,\n",
" seed=42\n",
")"
]
},
Expand Down Expand Up @@ -749,7 +750,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "ark38",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -763,7 +764,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
"version": "3.8.15"
},
"vscode": {
"interpreter": {
Expand Down

0 comments on commit 51fcb02

Please sign in to comment.