diff --git a/docs/environment.yml b/docs/environment.yml index e65fb13ac..2ab1379ae 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -7,3 +7,5 @@ dependencies: - numpydoc - sphinx_rtd_theme - yarl + - pip: + - sphinx-design diff --git a/docs/source/conf.py b/docs/source/conf.py index d916fd0f5..5f4622802 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -37,6 +37,7 @@ "sphinx.ext.autosummary", "sphinx.ext.extlinks", "numpydoc", + "sphinx_design", ] numpydoc_show_class_members = False diff --git a/docs/source/copying.rst b/docs/source/copying.rst new file mode 100644 index 000000000..dc7035d5b --- /dev/null +++ b/docs/source/copying.rst @@ -0,0 +1,119 @@ +Copying files and directories +============================= + +This documents the expected behavior of the ``fsspec`` file and directory copying functions. +There are three functions of interest here: :meth:`~fsspec.spec.AbstractFileSystem.copy`, +:meth:`~fsspec.spec.AbstractFileSystem.get` and :meth:`~fsspec.spec.AbstractFileSystem.put`. +Each of these copies files and/or directories from a ``source`` to a ``target`` location. +If we refer to our filesystem of interest, derived from :class:`~fsspec.spec.AbstractFileSystem`, +as the remote filesystem (even though it may be local) then the difference between the three +functions is: + + - :meth:`~fsspec.spec.AbstractFileSystem.copy` copies from a remote ``source`` to a remote ``target`` + - :meth:`~fsspec.spec.AbstractFileSystem.get` copies from a remote ``source`` to a local ``target`` + - :meth:`~fsspec.spec.AbstractFileSystem.put` copies from a local ``source`` to a remote ``target`` + +The ``source`` and ``target`` are the first two arguments passed to these functions, and each +consists of one or more files, directories and/or ``glob`` (wildcard) patterns. +The behavior of the ``fsspec`` copy functions is intended to be the same as that obtained using +POSIX command line ``cp`` but ``fsspec`` functions have extra functionality because: + + - They support more than one ``target`` whereas command line ``cp`` is restricted to one. + - They can create new directories, either automatically or via the ``auto_mkdir=True`` keyword + argument, whereas command line ``cp`` only does this as part of a recursive copy. + +Expected behavior +----------------- + +There follows a comprehensive list of the expected behavior of the ``fsspec`` copying functions +that also forms the basis of a set of tests that all classes that derive from +:class:`~fsspec.spec.AbstractFileSystem` can be tested against to check that they conform. +For all scenarios the ``source`` filesystem contains the following directories and files:: + + 📁 source + ├── 📄 file1 + ├── 📄 file2 + └── 📁 subdir + ├── 📄 subfile1 + ├── 📄 subfile2 + └── 📁 nesteddir + └── 📄 nestedfile + +and before each scenario the ``target`` directory exists and is empty unless otherwise noted:: + + 📁 target + +All example code uses :meth:`~fsspec.spec.AbstractFileSystem.cp` which is an alias of +:meth:`~fsspec.spec.AbstractFileSystem.copy`; equivalent behavior is expected by +:meth:`~fsspec.spec.AbstractFileSystem.get` and :meth:`~fsspec.spec.AbstractFileSystem.put`. +Forward slashes are used for directory separators throughout. + +1. Single source to single target +--------------------------------- + +.. dropdown:: 1a. File to existing directory + + .. code-block:: python + + cp("source/subdir/subfile1", "target") + + results in:: + + 📁 target + └── 📄 subfile1 + + The same result is obtained if the target has a trailing slash: ``"target/"``. + +.. dropdown:: 1b. File to new directory + + .. code-block:: python + + cp("source/subdir/subfile1", "target/newdir/") + + results in:: + + 📁 target + └── 📁 newdir + └── 📄 subfile1 + + This fails if the ``target`` file system is not capable of creating the directory, for example + if it is write-only or if ``auto_mkdir=False``. There is no command line equivalent of this + scenario without an explicit ``mkdir`` to create the new directory. + + The trailing slash is required on the new directory otherwise it is interpreted as a filename + which is a different scenario (1d. File to file in new directory). + +.. dropdown:: 1c. File to file in existing directory + + .. code-block:: python + + cp("source/subdir/subfile1", "target/newfile") + + results in:: + + 📁 target + └── 📄 newfile + + The same result is obtained if the target has a trailing slash: ``target/newfile/``. + +.. dropdown:: 1d. File to file in new directory + + .. code-block:: python + + cp("source/subdir/subfile1", "target/newdir/newfile") + + creates the new directory and copies the file into it:: + + 📁 target + └── 📁 newdir + └── 📄 newfile + + This fails if the ``target`` file system is not capable of creating the directory, for example + if it is write-only or if ``auto_mkdir=False``. There is no command line equivalent of this + scenario without an explicit ``mkdir`` to create the new directory. + + If there is a trailing slash on the target ``target/newdir/newfile/`` then it is interpreted as + a new directory which is a different scenario (1b. File to new directory). + +2. Multiple source to single target +----------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index 7836e298d..9ab3e1e64 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -107,6 +107,7 @@ The current list of known implementations can be found as follows intro.rst usage.rst features.rst + copying.rst developer.rst async.rst api.rst diff --git a/fsspec/implementations/tests/memory/memory_fixtures.py b/fsspec/implementations/tests/memory/memory_fixtures.py index 4dc279357..d30521221 100644 --- a/fsspec/implementations/tests/memory/memory_fixtures.py +++ b/fsspec/implementations/tests/memory/memory_fixtures.py @@ -19,8 +19,10 @@ def fs(): m.pseudo_dirs.clear() m.pseudo_dirs.append("") - def fs_join(self, *args): - return "/".join(args) + @staticmethod + @pytest.fixture + def fs_join(): + return lambda *args: "/".join(args) @staticmethod @pytest.fixture diff --git a/fsspec/tests/abstract/__init__.py b/fsspec/tests/abstract/__init__.py index 475014e7e..4301ac0d7 100644 --- a/fsspec/tests/abstract/__init__.py +++ b/fsspec/tests/abstract/__init__.py @@ -9,10 +9,42 @@ class AbstractFixtures: - def fs_join(self, *args): - # Most fsspec implementations join paths in a platform-dependent way, - # but some will override this to always use a forward slash. - return os.path.join(*args) + @staticmethod + @pytest.fixture + def fs_join(): + """ + Return a function that joins its arguments together into a path. + + Most fsspec implementations join paths in a platform-dependent way, + but some will override this to always use a forward slash. + """ + return os.path.join + + @staticmethod + @pytest.fixture + def fs_scenario_cp(fs, fs_join, fs_path): + """ + Scenario on remote filesystem that is used for many cp/get/put tests. + + 📁 source + ├── 📄 file1 + ├── 📄 file2 + └── 📁 subdir + ├── 📄 subfile1 + ├── 📄 subfile2 + └── 📁 nesteddir + └── 📄 nestedfile + """ + source = fs_join(fs_path, "source") + subdir = fs_join(source, "subdir") + nesteddir = fs_join(subdir, "nesteddir") + fs.makedirs(nesteddir) + fs.touch(fs_join(source, "file1")) + fs.touch(fs_join(source, "file2")) + fs.touch(fs_join(subdir, "subfile1")) + fs.touch(fs_join(subdir, "subfile2")) + fs.touch(fs_join(nesteddir, "nestedfile")) + return source @staticmethod @pytest.fixture @@ -21,10 +53,22 @@ def local_fs(): # for certain implementations. return LocalFileSystem(auto_mkdir=True) - def local_join(self, *args): - return os.path.join(*args) + @staticmethod + @pytest.fixture + def local_join(): + """ + Return a function that joins its arguments together into a path, on + the local filesystem. + """ + return os.path.join @staticmethod @pytest.fixture def local_path(tmpdir): return tmpdir + + def supports_empty_directories(self): + """ + Return whether this implementation supports empty directories. + """ + return True diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index fda1273bf..36eab3a52 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -1,16 +1,88 @@ class AbstractCopyTests: - def test_copy_two_files_new_directory(self, fs, fs_path): - source = self.fs_join(fs_path, "src") - file0 = self.fs_join(source, "file0") - file1 = self.fs_join(source, "file1") - fs.mkdir(source) - fs.touch(file0) - fs.touch(file1) - - target = self.fs_join(fs_path, "target") + def test_copy_file_to_existing_directory( + self, fs, fs_join, fs_path, fs_scenario_cp + ): + # Copy scenario 1a + source = fs_scenario_cp + + target = fs_join(fs_path, "target") + fs.mkdir(target) + if not self.supports_empty_directories(): + fs.touch(fs_join(target, "dummy")) + assert fs.isdir(target) + + target_file2 = fs_join(target, "file2") + target_subfile1 = fs_join(target, "subfile1") + + # Copy from source directory + fs.cp(fs_join(source, "file2"), target) + assert fs.isfile(target_file2) + + # Copy from sub directory + fs.cp(fs_join(source, "subdir", "subfile1"), target) + assert fs.isfile(target_subfile1) + + # Remove copied files + fs.rm([target_file2, target_subfile1]) + assert not fs.exists(target_file2) + assert not fs.exists(target_subfile1) + + # Repeat with trailing slash on target + fs.cp(fs_join(source, "file2"), target + "/") + assert fs.isdir(target) + assert fs.isfile(target_file2) + + fs.cp(fs_join(source, "subdir", "subfile1"), target + "/") + assert fs.isfile(target_subfile1) + + def test_copy_file_to_new_directory(self, fs, fs_join, fs_path, fs_scenario_cp): + # Copy scenario 1b + source = fs_scenario_cp + + target = fs_join(fs_path, "target") + fs.mkdir(target) + + fs.cp( + fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/") + ) # Note trailing slash + assert fs.isdir(target) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + + def test_copy_file_to_file_in_existing_directory( + self, fs, fs_join, fs_path, fs_scenario_cp + ): + # Copy scenario 1c + source = fs_scenario_cp + + target = fs_join(fs_path, "target") + fs.mkdir(target) + + fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile")) + assert fs.isfile(fs_join(target, "newfile")) + + def test_copy_file_to_file_in_new_directory( + self, fs, fs_join, fs_path, fs_scenario_cp + ): + # Copy scenario 1d + source = fs_scenario_cp + + target = fs_join(fs_path, "target") + fs.mkdir(target) + + fs.cp( + fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile") + ) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "newfile")) + + def test_copy_two_files_new_directory(self, fs, fs_join, fs_path, fs_scenario_cp): + source = fs_scenario_cp + + target = fs_join(fs_path, "target") assert not fs.exists(target) - fs.cp([file0, file1], target) + fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target) assert fs.isdir(target) - assert fs.isfile(self.fs_join(target, "file0")) - assert fs.isfile(self.fs_join(target, "file1")) + assert fs.isfile(fs_join(target, "file1")) + assert fs.isfile(fs_join(target, "file2")) diff --git a/fsspec/tests/abstract/get.py b/fsspec/tests/abstract/get.py index 7b1ac330f..992e9c6ae 100644 --- a/fsspec/tests/abstract/get.py +++ b/fsspec/tests/abstract/get.py @@ -1,13 +1,15 @@ class AbstractGetTests: - def test_get_directory_recursive(self, fs, fs_path, local_fs, local_path): + def test_get_directory_recursive( + self, fs, fs_join, fs_path, local_fs, local_join, local_path + ): # https://github.com/fsspec/filesystem_spec/issues/1062 # Recursive cp/get/put of source directory into non-existent target directory. - src = self.fs_join(fs_path, "src") - src_file = self.fs_join(src, "file") + src = fs_join(fs_path, "src") + src_file = fs_join(src, "file") fs.mkdir(src) fs.touch(src_file) - target = self.local_join(local_path, "target") + target = local_join(local_path, "target") # get without slash assert not local_fs.exists(target) @@ -16,12 +18,12 @@ def test_get_directory_recursive(self, fs, fs_path, local_fs, local_path): assert local_fs.isdir(target) if loop == 0: - assert local_fs.isfile(self.local_join(target, "file")) - assert not local_fs.exists(self.local_join(target, "src")) + assert local_fs.isfile(local_join(target, "file")) + assert not local_fs.exists(local_join(target, "src")) else: - assert local_fs.isfile(self.local_join(target, "file")) - assert local_fs.isdir(self.local_join(target, "src")) - assert local_fs.isfile(self.local_join(target, "src", "file")) + assert local_fs.isfile(local_join(target, "file")) + assert local_fs.isdir(local_join(target, "src")) + assert local_fs.isfile(local_join(target, "src", "file")) local_fs.rm(target, recursive=True) @@ -30,5 +32,5 @@ def test_get_directory_recursive(self, fs, fs_path, local_fs, local_path): for loop in range(2): fs.get(src + "/", target, recursive=True) assert local_fs.isdir(target) - assert local_fs.isfile(self.local_join(target, "file")) - assert not local_fs.exists(self.local_join(target, "src")) + assert local_fs.isfile(local_join(target, "file")) + assert not local_fs.exists(local_join(target, "src")) diff --git a/fsspec/tests/abstract/put.py b/fsspec/tests/abstract/put.py index d0d5b0786..cdb76a8b7 100644 --- a/fsspec/tests/abstract/put.py +++ b/fsspec/tests/abstract/put.py @@ -1,13 +1,15 @@ class AbstractPutTests: - def test_put_directory_recursive(self, fs, fs_path, local_fs, local_path): + def test_put_directory_recursive( + self, fs, fs_join, fs_path, local_fs, local_join, local_path + ): # https://github.com/fsspec/filesystem_spec/issues/1062 # Recursive cp/get/put of source directory into non-existent target directory. - src = self.local_join(local_path, "src") - src_file = self.local_join(src, "file") + src = local_join(local_path, "src") + src_file = local_join(src, "file") local_fs.mkdir(src) local_fs.touch(src_file) - target = self.fs_join(fs_path, "target") + target = fs_join(fs_path, "target") # put without slash assert not fs.exists(target) @@ -16,12 +18,12 @@ def test_put_directory_recursive(self, fs, fs_path, local_fs, local_path): assert fs.isdir(target) if loop == 0: - assert fs.isfile(self.fs_join(target, "file")) - assert not fs.exists(self.fs_join(target, "src")) + assert fs.isfile(fs_join(target, "file")) + assert not fs.exists(fs_join(target, "src")) else: - assert fs.isfile(self.fs_join(target, "file")) - assert fs.isdir(self.fs_join(target, "src")) - assert fs.isfile(self.fs_join(target, "src", "file")) + assert fs.isfile(fs_join(target, "file")) + assert fs.isdir(fs_join(target, "src")) + assert fs.isfile(fs_join(target, "src", "file")) fs.rm(target, recursive=True) @@ -30,5 +32,5 @@ def test_put_directory_recursive(self, fs, fs_path, local_fs, local_path): for loop in range(2): fs.put(src + "/", target, recursive=True) assert fs.isdir(target) - assert fs.isfile(self.fs_join(target, "file")) - assert not fs.exists(self.fs_join(target, "src")) + assert fs.isfile(fs_join(target, "file")) + assert not fs.exists(fs_join(target, "src"))