fixed move test

angelolab · Oct 13, 2022 · eba8169 · eba8169
1 parent c2d565c
commit eba8169
Show file tree

Hide file tree

Showing 5 changed files with 129 additions and 46 deletions.
diff --git a/ark/utils/example_dataset.py b/ark/utils/example_dataset.py
@@ -61,7 +61,7 @@ def download_example_dataset(self):
                                                    cache_dir=self.cache_dir,
                                                    use_auth_token=False)
 
-    def check_downloaded(self, dst_path: pathlib.Path) -> bool:
+    def check_empty_dst(self, dst_path: pathlib.Path) -> bool:
         """
         Checks to see if the folder for a dataset config already exists in the `save_dir`
         (i.e. `dst_path` is the specific folder for the config.). If the folder exists, and
@@ -105,22 +105,29 @@ def move_example_dataset(self, move_dir: Union[str, pathlib.Path]):
 
             # Overwrite the existing dataset when `overwrite_existing` == `True`
             # and when the `dst_path` is empty.
-            
+
             # `True` if `dst_path` is empty, `False` if data exists in `dst_path`
-            empty_dst_path = self.check_downloaded(dst_path=dst_path)
-            
+            empty_dst_path = self.check_empty_dst(dst_path=dst_path)
+
             if self.overwrite_existing:
                 if not empty_dst_path:
-                    warnings.WarningMessage(f"Files exist in {dst_path}. \
-                        They will be overwritten by the downloaded example dataset.")
-                    shutil.copytree(src_path, dst_path, dirs_exist_ok=True,
-                                    ignore=shutil.ignore_patterns("._*"))
+                    warnings.warn(UserWarning(f"Files exist in {dst_path}. \
+                        They will be overwritten by the downloaded example dataset."))
+
+                # Remove files in the destination path
+                [f.unlink() for f in dst_path.glob("*") if f.is_file()]
+                # Fill destination path
+                shutil.copytree(src_path, dst_path, dirs_exist_ok=True,
+                                ignore=shutil.ignore_patterns("._*"))
             else:
                 if empty_dst_path:
-                    warnings.WarningMessage(f"Files do not exist in {dst_path}. \
-                        The example dataset will be added in.")
+                    warnings.warn(UserWarning(f"Files do not exist in {dst_path}. \
+                        The example dataset will be added in."))
                     shutil.copytree(src_path, dst_path, dirs_exist_ok=True,
                                     ignore=shutil.ignore_patterns("._*"))
+                else:
+                    warnings.warn(UserWarning(f"Files exist in {dst_path}. \
+                        They will not be overwritten."))
 
 
 def get_example_dataset(dataset: str, save_dir: Union[str, pathlib.Path],

diff --git a/ark/utils/example_dataset_test.py b/ark/utils/example_dataset_test.py
@@ -1,3 +1,4 @@
+from email.generator import Generator
 import pathlib
 from typing import Callable, Iterator
 import itertools
@@ -24,13 +25,8 @@ def setup_temp_path_factory(tmp_path_factory) -> Iterator[pathlib.Path]:
     yield cache_dir
 
 
-# Only download the dataset configs required per tests w.r.t the notebooks.
-# Will not download reused dataset configs.
-data_tests = ["segment_image_data", "cluster_pixels", "cluster_cells", "post_clustering"]
-# We get [(True, segment_image_data), (False, segment_image_data), (True, cluster_pixels), ...]
-dataset_download_fixture_params = list(itertools.product([True, False], data_tests))
-
-@pytest.fixture(scope="session", params=dataset_download_fixture_params)
+@pytest.fixture(scope="session", params=["segment_image_data", "cluster_pixels",
+                                         "cluster_cells", "post_clustering"])
 def dataset_download(setup_temp_path_factory, request) -> Iterator[ExampleDataset]:
     """
     A Fixture which instantiates and downloads the dataset with respect to each
@@ -47,8 +43,7 @@ def dataset_download(setup_temp_path_factory, request) -> Iterator[ExampleDatase
     """
     # Set up ExampleDataset class
     example_dataset: ExampleDataset = ExampleDataset(
-        overwrite_existing=request.param[0],
-        dataset=request.param[1],
+        dataset=request.param,
         cache_dir=setup_temp_path_factory,
         revision="a3b0db4fa93c194bfcaf5d4daccbe6573c6a6f7c"
     )
@@ -131,28 +126,83 @@ def test_download_example_dataset(self, dataset_download: ExampleDataset):
                 dataset_download.dataset_paths[dataset_download.dataset][ds_n][0])
             self.dataset_test_fns[ds_n](dir_p=dataset_cache_path / ds_n)
 
-    def test_move_example_dataset(self, tmp_path_factory, dataset_download: ExampleDataset):
+    @pytest.mark.parametrize("_overwrite_existing", [True, False])
+    def test_move_example_dataset(self, tmp_path_factory, dataset_download: ExampleDataset,
+                                  _overwrite_existing: bool):
         """
         Tests to make sure the proper files are moved to the correct directories.
 
         Args:
+            tmp_path_factory (pytest.TempPathFactory): Factory for temporary directories under the
+                common base temp directory.
             dataset_download (ExampleDataset): Fixture for the dataset, respective to each
-            partition (`segment_image_data`, `cluster_pixels`, `cluster_cells`,
-            `post_clustering`).
+                partition (`segment_image_data`, `cluster_pixels`, `cluster_cells`,
+                `post_clustering`).
+            _overwrite_existing (bool): If `True` the dataset will be overwritten. If `False` it
+                will not be.
         """
-        tmp_dir = tmp_path_factory.mktemp("move_example_data")
-        move_dir = tmp_dir / "example_dataset"
-        dataset_download.move_example_dataset(move_dir=move_dir)
-
-        dataset_names = list(
-            dataset_download.dataset_paths[dataset_download.dataset].features.keys()
-        )
-
-        for ds_n in dataset_names:
-            ds_n_suffix = self.move_path_suffixes[ds_n]
-
-            dir_p = move_dir / ds_n_suffix
-            self.dataset_test_fns[ds_n](dir_p)
+        dataset_download.overwrite_existing = _overwrite_existing
+
+        # Move data if _overwrite_existing is `True`
+        if _overwrite_existing:
+
+            # Case 1: Move Path is empty
+            tmp_dir_c1 = tmp_path_factory.mktemp("move_example_data_c1")
+            move_dir_c1 = tmp_dir_c1 / "example_dataset"
+            dataset_download.move_example_dataset(move_dir=move_dir_c1)
+
+            for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c1):
+                self.dataset_test_fns[ds_n](dir_p)
+
+            # Case 2: Move Path contains files
+            tmp_dir_c2 = tmp_path_factory.mktemp("move_example_data_c2")
+            move_dir_c2 = tmp_dir_c2 / "example_dataset"
+
+            # Add files for each config to test moving with files
+            for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c2):
+                # make directory
+                dir_p.mkdir(parents=True, exist_ok=False)
+                # make blank file
+                test_utils._make_blank_file(dir_p, "data_test.txt")
+
+            # Move files to directory which has existing files
+            # Make sure warning is raised
+            with pytest.warns(UserWarning):
+                dataset_download.move_example_dataset(move_dir=move_dir_c2)
+                for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c2):
+                    self.dataset_test_fns[ds_n](dir_p)
+
+        # Move data if _overwrite_existing is `False`
+        else:
+            # Case 1: Move Path is empty
+            tmp_dir_c1 = tmp_path_factory.mktemp("move_example_data_c1")
+            move_dir_c1 = tmp_dir_c1 / "example_dataset"
+
+            # Check that the files were moved to the empty directory
+            # Make sure warning is raised
+            with pytest.warns(UserWarning):
+                dataset_download.move_example_dataset(move_dir=move_dir_c1)
+
+                for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c1):
+                    self.dataset_test_fns[ds_n](dir_p)
+
+            # Case 2: Move Path contains files
+            tmp_dir_c2 = tmp_path_factory.mktemp("move_example_data_c2")
+            move_dir_c2 = tmp_dir_c2 / "example_dataset"
+
+            # Add files for each config to test moving with files
+            for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c2):
+                # make directory
+                dir_p.mkdir(parents=True, exist_ok=False)
+                # make blank file
+                test_utils._make_blank_file(dir_p, "data_test.txt")
+
+            # Do not move files to directory containing files
+            # Make sure warning is raised.
+            with pytest.warns(UserWarning):
+                dataset_download.move_example_dataset(move_dir=move_dir_c2)
+                for dir_p, ds_n in self._suffix_paths(dataset_download, parent_dir=move_dir_c2):
+                    assert len(list(dir_p.rglob("*"))) == 1
 
     # Will cause duplicate downloads
     def test_get_example_dataset(self, tmp_path_factory):
@@ -164,9 +214,10 @@ def test_get_example_dataset(self, tmp_path_factory):
         with pytest.raises(ValueError):
             get_example_dataset("incorrect_dataset", save_dir=tmp_path_factory)
 
-    def test_check_downloaded(self, tmp_path):
+    def test_check_empty_dst(self, tmp_path):
         """
-        Tests to make sure that `ExampleDataset.get_example_dataset()` accurately
+        Tests to make sure that `ExampleDataset.check_empty_dst
+        ()` accurately
         reports if a directory contains files or not.
         """
 
@@ -177,11 +228,11 @@ def test_check_downloaded(self, tmp_path):
         packed_data_dir.mkdir(parents=True)
 
         # Empty directory has no files
-        assert example_dataset.check_downloaded(empty_data_dir) is True
+        assert example_dataset.check_empty_dst(empty_data_dir) is True
 
         # Directory has files
         test_utils._make_blank_file(packed_data_dir, "data_test.txt")
-        assert example_dataset.check_downloaded(packed_data_dir) is False
+        assert example_dataset.check_empty_dst(packed_data_dir) is False
 
     def _image_data_check(self, dir_p: pathlib.Path):
         """
@@ -321,3 +372,26 @@ def _example_cell_output_dir_check(self, dir_p: pathlib.Path):
         cell_mask_names = [f.stem for f in cell_mask_files]
         assert set(self._example_cell_output_dir_names["cell_masks"]) \
             == set(cell_mask_names)
+
+    def _suffix_paths(self, dataset_download: ExampleDataset,
+                      parent_dir: pathlib.Path) -> Generator:
+        """
+        Creates a generator where each element is a tuple of the data directory
+        and the dataset name.
+
+        Args:
+            dataset_download (ExampleDataset): Fixture for the dataset, respective to each
+            partition (`segment_image_data`, `cluster_pixels`, `cluster_cells`,
+            `post_clustering`).
+            parent_dir (pathlib.Path): The path where the example dataset will be saved.
+
+        Yields:
+            Generator: Yields the data directory for the files to be moved, and the dataset name.
+        """
+        dataset_names = list(
+            dataset_download.dataset_paths[dataset_download.dataset].features.keys()
+        )
+
+        ds_n_suffixes = [self.move_path_suffixes[ds_n] for ds_n in dataset_names]
+        for ds_n_suffix, ds_n in zip(ds_n_suffixes, dataset_names):
+            yield (parent_dir / ds_n_suffix, ds_n)
diff --git a/templates/2_Cluster_Pixels.ipynb b/templates/2_Cluster_Pixels.ipynb
@@ -702,7 +702,7 @@
     "    cluster_type='pixel'\n",
     ")\n",
     "pixel_mcd.output_mapping_filename = os.path.join(base_dir, pixel_meta_cluster_remap_name)\n",
-    "pixel_mcg = MetaClusterGui(pixel_mcd, width=17"
+    "pixel_mcg = MetaClusterGui(pixel_mcd, width=17)"
    ]
   },
   {

diff --git a/templates/3_Cluster_Cells.ipynb b/templates/3_Cluster_Cells.ipynb
@@ -965,7 +965,7 @@
     "    img_data_path=tiff_dir,\n",
     "    mask_output_dir=os.path.join(base_dir, \"segmentation\", cell_output_dir, \"cell_masks\"),\n",
     "    mapping = os.path.join(base_dir, cell_meta_cluster_remap_name),\n",
-    "    seg_dir=os.path.join(base_dir, deepcell_output, \"deepcell_output\"),\n",
+    "    seg_dir=os.path.join(base_dir, \"segmentation\", \"deepcell_output\"),\n",
     "    mask_suffix=\"_cell_mask\")"
    ]
   },

diff --git a/templates/example_fiber_segmentation.ipynb b/templates/example_fiber_segmentation.ipynb
@@ -171,11 +171,8 @@
   }
  ],
  "metadata": {
-  "interpreter": {
-   "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
-  },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "ark-3.8",
    "language": "python",
    "name": "python3"
   },
@@ -189,7 +186,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.8.13 (default, Sep 30 2022, 15:46:23) \n[Clang 14.0.0 (clang-1400.0.29.102)]"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "b4883fe62ab0956c6f629c0d5453344976a44af1f953254d3dc7063b257762f9"
+   }
   }
  },
  "nbformat": 4,