Skip to content

Commit

Permalink
Example Dataset - Notebook 4 (#763)
Browse files Browse the repository at this point in the history
* dataset for notebook 4

* pycodestyle

* updated docstring

* fiber_segmentation_fix

* test incorrect dataset parameter

* removed test which runs through all of the Segment Image Data notebook

* moving correct merged files over to n4

* minimal changes from main

* import statement fix

* all datasets

* updated cell problematic_cluster, marker

* nb4 adjustments

* nb4 adjustments
  • Loading branch information
srivarra authored Oct 20, 2022
1 parent 6073dbe commit 554dafe
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 51 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@ Pipfile.lock
data/example_dataset/image_data/
data/example_dataset/segmentation/
data/example_dataset/pixie/
data/example_dataset/post_clustering/
1 change: 0 additions & 1 deletion ark/segmentation/fiber_segmentation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import tempfile
import pytest


import ark.settings as settings
from ark.utils import io_utils, example_dataset
from ark.segmentation import fiber_segmentation
Expand Down
1 change: 1 addition & 0 deletions ark/utils/example_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(self, dataset: str, overwrite_existing: bool = True, cache_dir: str
"cell_table": "segmentation/cell_table",
"deepcell_output": "segmentation/deepcell_output",
"example_pixel_output_dir": "pixie/example_pixel_output_dir",
"example_cell_output_dir": "pixie/example_cell_output_dir",
}
"""
Path suffixes for mapping each downloaded dataset partition to it's appropriate
Expand Down
57 changes: 55 additions & 2 deletions ark/utils/example_dataset_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pathlib
from typing import Callable, Iterator, Generator
import itertools
import pytest
from ark.utils.example_dataset import ExampleDataset, get_example_dataset
from ark.utils import test_utils
Expand All @@ -24,7 +23,8 @@ def setup_temp_path_factory(tmp_path_factory) -> Iterator[pathlib.Path]:
yield cache_dir


@pytest.fixture(scope="session", params=["cluster_cells"])
@pytest.fixture(scope="session", params=["segment_image_data", "cluster_pixels",
"cluster_cells", "post_clustering"])
def dataset_download(setup_temp_path_factory, request) -> Iterator[ExampleDataset]:
"""
A Fixture which instantiates and downloads the dataset with respect to each
Expand Down Expand Up @@ -77,11 +77,25 @@ def _setup(self):
"pixel_masks": [f"fov{i}_pixel_mask" for i in range(2)]
}

self._example_cell_output_dir_names = {
"root_files": ["example_cell_clust_to_meta", "example_cell_mat",
"example_cell_meta_cluster_channel_avg",
"example_cell_meta_cluster_count_avgs",
"example_cell_som_cluster_channel_avg",
"example_cell_meta_cluster_mapping",
"example_cell_som_cluster_channel_avg",
"example_cell_som_cluster_count_avgs",
"example_cell_weights", "example_cluster_counts",
"example_cluster_counts_norm", "example_weighted_cell_channel"],
"cell_masks": [f"fov{i}_cell_mask" for i in range(2)]
}

self.dataset_test_fns: dict[str, Callable] = {
"image_data": self._image_data_check,
"cell_table": self._cell_table_check,
"deepcell_output": self._deepcell_output_check,
"example_pixel_output_dir": self._example_pixel_output_dir_check,
"example_cell_output_dir": self._example_cell_output_dir_check,
}

# Mapping the datasets to their respective test functions.
Expand All @@ -90,6 +104,7 @@ def _setup(self):
"cell_table": "segmentation/cell_table",
"deepcell_output": "segmentation/deepcell_output",
"example_pixel_output_dir": "pixie/example_pixel_output_dir",
"example_cell_output_dir": "pixie/example_cell_output_dir",
}

def test_download_example_dataset(self, dataset_download: ExampleDataset):
Expand Down Expand Up @@ -318,6 +333,44 @@ def _example_pixel_output_dir_check(self, dir_p: pathlib.Path):
assert set(self._example_pixel_output_dir_names["pixel_masks"]) \
== set(pixel_mask_names)

def _example_cell_output_dir_check(self, dir_p: pathlib.Path):
"""
Checks to make sure that the following files exist w.r.t the
`example_cell_output_dir`.
```
example_cell_output_dir/
├── cell_masks/
│ ├── fov0_cell_mask.tiff
│ └── fov1_cell_mask.tiff
├── example_cell_clust_to_meta.feather
├── example_cell_mat.feather
├── example_cell_meta_cluster_channel_avg.csv
├── example_cell_meta_cluster_count_avgs.csv
├── example_cell_meta_cluster_mapping.csv
├── example_cell_som_cluster_channel_avg.csv
├── example_cell_som_cluster_count_avgs.csv
├── example_cell_weights.feather
├── example_cluster_counts.feather
├── example_cluster_counts_norm.feather
└── example_weighted_cell_channel.csv
```
Args:
dir_p (pathlib.Path): The directory to check.
"""

# Root Files
root_files = list(dir_p.glob("*.feather")) + list(dir_p.glob("*.csv"))
root_file_names = [f.stem for f in root_files]
assert set(self._example_cell_output_dir_names["root_files"]) == set(root_file_names)

# Cell Masks
cell_mask_files = list((dir_p / "cell_masks").glob("*.tiff"))
cell_mask_names = [f.stem for f in cell_mask_files]
assert set(self._example_cell_output_dir_names["cell_masks"]) \
== set(cell_mask_names)

def _suffix_paths(self, dataset_download: ExampleDataset,
parent_dir: pathlib.Path) -> Generator:
"""
Expand Down
Loading

0 comments on commit 554dafe

Please sign in to comment.