Skip to content

Commit

Permalink
fixed move test
Browse files Browse the repository at this point in the history
  • Loading branch information
srivarra committed Oct 13, 2022
1 parent c2d565c commit eba8169
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 46 deletions.
27 changes: 17 additions & 10 deletions ark/utils/example_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def download_example_dataset(self):
cache_dir=self.cache_dir,
use_auth_token=False)

def check_downloaded(self, dst_path: pathlib.Path) -> bool:
def check_empty_dst(self, dst_path: pathlib.Path) -> bool:
"""
Checks to see if the folder for a dataset config already exists in the `save_dir`
(i.e. `dst_path` is the specific folder for the config.). If the folder exists, and
Expand Down Expand Up @@ -105,22 +105,29 @@ def move_example_dataset(self, move_dir: Union[str, pathlib.Path]):

# Overwrite the existing dataset when `overwrite_existing` == `True`
# and when the `dst_path` is empty.

# `True` if `dst_path` is empty, `False` if data exists in `dst_path`
empty_dst_path = self.check_downloaded(dst_path=dst_path)
empty_dst_path = self.check_empty_dst(dst_path=dst_path)

if self.overwrite_existing:
if not empty_dst_path:
warnings.WarningMessage(f"Files exist in {dst_path}. \
They will be overwritten by the downloaded example dataset.")
shutil.copytree(src_path, dst_path, dirs_exist_ok=True,
ignore=shutil.ignore_patterns("._*"))
warnings.warn(UserWarning(f"Files exist in {dst_path}. \
They will be overwritten by the downloaded example dataset."))

# Remove files in the destination path
[f.unlink() for f in dst_path.glob("*") if f.is_file()]
# Fill destination path
shutil.copytree(src_path, dst_path, dirs_exist_ok=True,
ignore=shutil.ignore_patterns("._*"))
else:
if empty_dst_path:
warnings.WarningMessage(f"Files do not exist in {dst_path}. \
The example dataset will be added in.")
warnings.warn(UserWarning(f"Files do not exist in {dst_path}. \
The example dataset will be added in."))
shutil.copytree(src_path, dst_path, dirs_exist_ok=True,
ignore=shutil.ignore_patterns("._*"))
else:
warnings.warn(UserWarning(f"Files exist in {dst_path}. \
They will not be overwritten."))


def get_example_dataset(dataset: str, save_dir: Union[str, pathlib.Path],
Expand Down
132 changes: 103 additions & 29 deletions ark/utils/example_dataset_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from email.generator import Generator
import pathlib
from typing import Callable, Iterator
import itertools
Expand All @@ -24,13 +25,8 @@ def setup_temp_path_factory(tmp_path_factory) -> Iterator[pathlib.Path]:
yield cache_dir


# Only download the dataset configs required per tests w.r.t the notebooks.
# Will not download reused dataset configs.
data_tests = ["segment_image_data", "cluster_pixels", "cluster_cells", "post_clustering"]
# We get [(True, segment_image_data), (False, segment_image_data), (True, cluster_pixels), ...]
dataset_download_fixture_params = list(itertools.product([True, False], data_tests))

@pytest.fixture(scope="session", params=dataset_download_fixture_params)
@pytest.fixture(scope="session", params=["segment_image_data", "cluster_pixels",
"cluster_cells", "post_clustering"])
def dataset_download(setup_temp_path_factory, request) -> Iterator[ExampleDataset]:
"""
A Fixture which instantiates and downloads the dataset with respect to each
Expand All @@ -47,8 +43,7 @@ def dataset_download(setup_temp_path_factory, request) -> Iterator[ExampleDatase
"""
# Set up ExampleDataset class
example_dataset: ExampleDataset = ExampleDataset(
overwrite_existing=request.param[0],
dataset=request.param[1],
dataset=request.param,
cache_dir=setup_temp_path_factory,
revision="a3b0db4fa93c194bfcaf5d4daccbe6573c6a6f7c"
)
Expand Down Expand Up @@ -131,28 +126,83 @@ def test_download_example_dataset(self, dataset_download: ExampleDataset):
dataset_download.dataset_paths[dataset_download.dataset][ds_n][0])
self.dataset_test_fns[ds_n](dir_p=dataset_cache_path / ds_n)

def test_move_example_dataset(self, tmp_path_factory, dataset_download: ExampleDataset):
@pytest.mark.parametrize("_overwrite_existing", [True, False])
def test_move_example_dataset(self, tmp_path_factory, dataset_download: ExampleDataset,
_overwrite_existing: bool):
"""
Tests to make sure the proper files are moved to the correct directories.
Args:
tmp_path_factory (pytest.TempPathFactory): Factory for temporary directories under the
common base temp directory.
dataset_download (ExampleDataset): Fixture for the dataset, respective to each
partition (`segment_image_data`, `cluster_pixels`, `cluster_cells`,
`post_clustering`).
partition (`segment_image_data`, `cluster_pixels`, `cluster_cells`,
`post_clustering`).
_overwrite_existing (bool): If `True` the dataset will be overwritten. If `False` it
will not be.
"""
tmp_dir = tmp_path_factory.mktemp("move_example_data")
move_dir = tmp_dir / "example_dataset"
dataset_download.move_example_dataset(move_dir=move_dir)

dataset_names = list(
dataset_download.dataset_paths[dataset_download.dataset].features.keys()
)

for ds_n in dataset_names:
ds_n_suffix = self.move_path_suffixes[ds_n]

dir_p = move_dir / ds_n_suffix
self.dataset_test_fns[ds_n](dir_p)
dataset_download.overwrite_existing = _overwrite_existing

# Move data if _overwrite_existing is `True`
if _overwrite_existing:

# Case 1: Move Path is empty
tmp_dir_c1 = tmp_path_factory.mktemp("move_example_data_c1")
move_dir_c1 = tmp_dir_c1 / "example_dataset"
dataset_download.move_example_dataset(move_dir=move_dir_c1)

for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c1):
self.dataset_test_fns[ds_n](dir_p)

# Case 2: Move Path contains files
tmp_dir_c2 = tmp_path_factory.mktemp("move_example_data_c2")
move_dir_c2 = tmp_dir_c2 / "example_dataset"

# Add files for each config to test moving with files
for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c2):
# make directory
dir_p.mkdir(parents=True, exist_ok=False)
# make blank file
test_utils._make_blank_file(dir_p, "data_test.txt")

# Move files to directory which has existing files
# Make sure warning is raised
with pytest.warns(UserWarning):
dataset_download.move_example_dataset(move_dir=move_dir_c2)
for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c2):
self.dataset_test_fns[ds_n](dir_p)

# Move data if _overwrite_existing is `False`
else:
# Case 1: Move Path is empty
tmp_dir_c1 = tmp_path_factory.mktemp("move_example_data_c1")
move_dir_c1 = tmp_dir_c1 / "example_dataset"

# Check that the files were moved to the empty directory
# Make sure warning is raised
with pytest.warns(UserWarning):
dataset_download.move_example_dataset(move_dir=move_dir_c1)

for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c1):
self.dataset_test_fns[ds_n](dir_p)

# Case 2: Move Path contains files
tmp_dir_c2 = tmp_path_factory.mktemp("move_example_data_c2")
move_dir_c2 = tmp_dir_c2 / "example_dataset"

# Add files for each config to test moving with files
for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c2):
# make directory
dir_p.mkdir(parents=True, exist_ok=False)
# make blank file
test_utils._make_blank_file(dir_p, "data_test.txt")

# Do not move files to directory containing files
# Make sure warning is raised.
with pytest.warns(UserWarning):
dataset_download.move_example_dataset(move_dir=move_dir_c2)
for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c2):
assert len(list(dir_p.rglob("*"))) == 1

# Will cause duplicate downloads
def test_get_example_dataset(self, tmp_path_factory):
Expand All @@ -164,9 +214,10 @@ def test_get_example_dataset(self, tmp_path_factory):
with pytest.raises(ValueError):
get_example_dataset("incorrect_dataset", save_dir=tmp_path_factory)

def test_check_downloaded(self, tmp_path):
def test_check_empty_dst(self, tmp_path):
"""
Tests to make sure that `ExampleDataset.get_example_dataset()` accurately
Tests to make sure that `ExampleDataset.check_empty_dst
()` accurately
reports if a directory contains files or not.
"""

Expand All @@ -177,11 +228,11 @@ def test_check_downloaded(self, tmp_path):
packed_data_dir.mkdir(parents=True)

# Empty directory has no files
assert example_dataset.check_downloaded(empty_data_dir) is True
assert example_dataset.check_empty_dst(empty_data_dir) is True

# Directory has files
test_utils._make_blank_file(packed_data_dir, "data_test.txt")
assert example_dataset.check_downloaded(packed_data_dir) is False
assert example_dataset.check_empty_dst(packed_data_dir) is False

def _image_data_check(self, dir_p: pathlib.Path):
"""
Expand Down Expand Up @@ -321,3 +372,26 @@ def _example_cell_output_dir_check(self, dir_p: pathlib.Path):
cell_mask_names = [f.stem for f in cell_mask_files]
assert set(self._example_cell_output_dir_names["cell_masks"]) \
== set(cell_mask_names)

def _suffix_paths(self, dataset_download: ExampleDataset,
parent_dir: pathlib.Path) -> Generator:
"""
Creates a generator where each element is a tuple of the data directory
and the dataset name.
Args:
dataset_download (ExampleDataset): Fixture for the dataset, respective to each
partition (`segment_image_data`, `cluster_pixels`, `cluster_cells`,
`post_clustering`).
parent_dir (pathlib.Path): The path where the example dataset will be saved.
Yields:
Generator: Yields the data directory for the files to be moved, and the dataset name.
"""
dataset_names = list(
dataset_download.dataset_paths[dataset_download.dataset].features.keys()
)

ds_n_suffixes = [self.move_path_suffixes[ds_n] for ds_n in dataset_names]
for ds_n_suffix, ds_n in zip(ds_n_suffixes, dataset_names):
yield (parent_dir / ds_n_suffix, ds_n)
2 changes: 1 addition & 1 deletion templates/2_Cluster_Pixels.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@
" cluster_type='pixel'\n",
")\n",
"pixel_mcd.output_mapping_filename = os.path.join(base_dir, pixel_meta_cluster_remap_name)\n",
"pixel_mcg = MetaClusterGui(pixel_mcd, width=17"
"pixel_mcg = MetaClusterGui(pixel_mcd, width=17)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion templates/3_Cluster_Cells.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@
" img_data_path=tiff_dir,\n",
" mask_output_dir=os.path.join(base_dir, \"segmentation\", cell_output_dir, \"cell_masks\"),\n",
" mapping = os.path.join(base_dir, cell_meta_cluster_remap_name),\n",
" seg_dir=os.path.join(base_dir, deepcell_output, \"deepcell_output\"),\n",
" seg_dir=os.path.join(base_dir, \"segmentation\", \"deepcell_output\"),\n",
" mask_suffix=\"_cell_mask\")"
]
},
Expand Down
12 changes: 7 additions & 5 deletions templates/example_fiber_segmentation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,8 @@
}
],
"metadata": {
"interpreter": {
"hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "ark-3.8",
"language": "python",
"name": "python3"
},
Expand All @@ -189,7 +186,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
"version": "3.8.13 (default, Sep 30 2022, 15:46:23) \n[Clang 14.0.0 (clang-1400.0.29.102)]"
},
"vscode": {
"interpreter": {
"hash": "b4883fe62ab0956c6f629c0d5453344976a44af1f953254d3dc7063b257762f9"
}
}
},
"nbformat": 4,
Expand Down

0 comments on commit eba8169

Please sign in to comment.