diff --git a/docs/source/copying.rst b/docs/source/copying.rst index dc7035d5b..e2309e6ba 100644 --- a/docs/source/copying.rst +++ b/docs/source/copying.rst @@ -55,14 +55,15 @@ Forward slashes are used for directory separators throughout. .. code-block:: python - cp("source/subdir/subfile1", "target") + cp("source/subdir/subfile1", "target/") results in:: πŸ“ target └── πŸ“„ subfile1 - The same result is obtained if the target has a trailing slash: ``"target/"``. + The trailing slash on ``"target/"`` is optional but recommended as it explicitly indicates that + the target is a directory. .. dropdown:: 1b. File to new directory @@ -94,7 +95,8 @@ Forward slashes are used for directory separators throughout. πŸ“ target └── πŸ“„ newfile - The same result is obtained if the target has a trailing slash: ``target/newfile/``. + The target cannot have a trailing slash as ``"newfile/"`` is interpreted as a new directory + which is a different scenario (1b. File to new directory). .. dropdown:: 1d. File to file in new directory @@ -115,5 +117,217 @@ Forward slashes are used for directory separators throughout. If there is a trailing slash on the target ``target/newdir/newfile/`` then it is interpreted as a new directory which is a different scenario (1b. File to new directory). +.. dropdown:: 1e. Directory to existing directory + + .. warning:: + + ``recursive=False`` is not correct + (`issue 1232 `_). + + ``maxdepth`` is not yet implemented for copying functions + (`issue 1231 `_). + + .. code-block:: python + + cp("source/subdir/", "target/", recursive=True) + + results in:: + + πŸ“ target + β”œβ”€β”€ πŸ“„ subfile1 + └── πŸ“„ subfile2 + └── πŸ“ nesteddir + └── πŸ“„ nestedfile + + The ``recursive=True`` keyword argument is required otherwise the call does nothing. The depth + of recursion can be controlled using the ``maxdepth`` keyword argument, for example: + + .. code-block:: python + + cp("source/subdir/", "target/", recursive=True, maxdepth=1) + + results in:: + + πŸ“ target + β”œβ”€β”€ πŸ“„ subfile1 + └── πŸ“„ subfile2 + + The trailing slash on ``"target/"`` is optional but recommended as it explicitly indicates that + the target is a directory. + + If the trailing slash is omitted from ``"source/subdir"`` then the ``subdir`` is also copied, + not just its contents: + + .. code-block:: python + + cp("source/subdir", "target/", recursive=True) + + results in:: + + πŸ“ target + └── πŸ“ subdir + β”œβ”€β”€ πŸ“„ subfile1 + └── πŸ“„ subfile2 + └── πŸ“ nesteddir + └── πŸ“„ nestedfile + +.. dropdown:: 1f. Directory to new directory + + .. warning:: + + ``recursive=False`` is not correct + (`issue 1232 `_). + + ``maxdepth`` is not yet implemented for copying functions + (`issue 1231 `_). + + .. code-block:: python + + cp("source/subdir/", "target/newdir/", recursive=True) + + results in:: + + πŸ“ target + └── πŸ“ newdir + β”œβ”€β”€ πŸ“„ subfile1 + └── πŸ“„ subfile2 + └── πŸ“ nesteddir + └── πŸ“„ nestedfile + + Trailing slashes on both ``source`` and ``target`` are optional and do not affect the result. + They are recommended to explicitly indicate both are directories. + + The ``recursive=True`` keyword argument is required otherwise the call does nothing. The depth + of recursion can be controlled using the ``maxdepth`` keyword argument. + +.. dropdown:: 1g. Glob to existing directory + + .. warning:: + + This does not currently work correctly, it creates a extra directory + (`issue 1233 `_). + + Nonrecursive + + .. code-block:: python + + cp("source/subdir/*", "target/") + + copies files from the top-level directory only and results in:: + + πŸ“ target + β”œβ”€β”€ πŸ“„ subfile1 + └── πŸ“„ subfile2 + + Recursive + + .. code-block:: python + + cp("source/subdir/*", "target/", recursive=True) + + results in:: + + πŸ“ target + β”œβ”€β”€ πŸ“„ subfile1 + └── πŸ“„ subfile2 + └── πŸ“ nesteddir + └── πŸ“„ nestedfile + + The depth of recursion can be controlled by the ``maxdepth`` keyword argument. + + The trailing slash on ``"target/"`` is optional but recommended as it explicitly indicates that + the target is a directory. + +.. dropdown:: 1h. Glob to new directory + + .. warning:: + + This does not currently work correctly, it creates a extra directory + (`issue 1233 `_). + + Nonrecursive + + .. code-block:: python + + cp("source/subdir/*", "target/newdir/") + + copies files from the top-level directory only and results in:: + + πŸ“ target + └── πŸ“ newdir + β”œβ”€β”€ πŸ“„ subfile1 + └── πŸ“„ subfile2 + + Recursive + + .. code-block:: python + + cp("source/subdir/*", "target/newdir/", recursive=True) + + results in:: + + πŸ“ target + └── πŸ“ newdir + β”œβ”€β”€ πŸ“„ subfile1 + └── πŸ“„ subfile2 + └── πŸ“ nesteddir + └── πŸ“„ nestedfile + + The depth of recursion can be controlled by the ``maxdepth`` keyword argument. + + The trailing slash on the ``target`` is optional but recommended as it explicitly indicates that + it is a directory. + + These calls fail if the ``target`` file system is not capable of creating the directory, for + example if it is write-only or if ``auto_mkdir=False``. There is no command line equivalent of + this scenario without an explicit ``mkdir`` to create the new directory. + 2. Multiple source to single target ----------------------------------- + +.. dropdown:: 2a. List of files to existing directory + + .. warning:: + + This is not correct currently, it does not place all files in the same directory + (`issue 1234 `_). + + .. code-block:: python + + cp(["source/file1", "source/file2", "source/subdir/subfile1"], "target/") + + results in:: + + πŸ“ target + β”œβ”€β”€ πŸ“„ file1 + β”œβ”€β”€ πŸ“„ file2 + └── πŸ“„ subfile1 + + All of the files are copied to the target directory regardless of their relative paths in the + source filesystem. The trailing slash on the ``target`` is optional but recommended as it + explicitly indicates that it is a directory. + +.. dropdown:: 2b. List of files to new directory + + .. warning:: + + This is not correct currently, it does not place all files in the same directory + (`issue 1234 `_). + + .. code-block:: python + + cp(["source/file1", "source/file2", "source/subdir/subfile1"], "target/newdir/") + + results in:: + + πŸ“ target + └── πŸ“ newdir + β”œβ”€β”€ πŸ“„ file1 + β”œβ”€β”€ πŸ“„ file2 + └── πŸ“„ subfile1 + + All of the files are copied to the target directory regardless of their relative paths in the + source filesystem. + + The trailing slash is required on the new directory otherwise it is interpreted as a filename + rather than a directory. diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index 36eab3a52..371d45a77 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -76,7 +76,205 @@ def test_copy_file_to_file_in_new_directory( assert fs.isdir(fs_join(target, "newdir")) assert fs.isfile(fs_join(target, "newdir", "newfile")) + def test_copy_directory_to_existing_directory( + self, fs, fs_join, fs_path, fs_scenario_cp + ): + # Copy scenario 1e + source = fs_scenario_cp + + target = fs_join(fs_path, "target") + fs.mkdir(target) + + for source_slash, target_slash in zip([False, True], [False, True]): + s = fs_join(source, "subdir") + if source_slash: + s += "/" + t = target + "/" if target_slash else target + + # Without recursive does nothing + # ERROR: erroneously creates new directory + # fs.cp(s, t) + # assert fs.ls(target) == [] + + # With recursive + fs.cp(s, t, recursive=True) + if source_slash: + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert fs.isdir(fs_join(target, "nesteddir")) + assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) + + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) + else: + assert fs.isdir(fs_join(target, "subdir")) + assert fs.isfile(fs_join(target, "subdir", "subfile1")) + assert fs.isfile(fs_join(target, "subdir", "subfile2")) + assert fs.isdir(fs_join(target, "subdir", "nesteddir")) + assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile")) + + fs.rm(fs_join(target, "subdir"), recursive=True) + assert fs.ls(target) == [] + + # Limit by maxdepth + # ERROR: maxdepth ignored here + + def test_copy_directory_to_new_directory( + self, fs, fs_join, fs_path, fs_scenario_cp + ): + # Copy scenario 1f + source = fs_scenario_cp + + target = fs_join(fs_path, "target") + fs.mkdir(target) + + for source_slash, target_slash in zip([False, True], [False, True]): + s = fs_join(source, "subdir") + if source_slash: + s += "/" + t = fs_join(target, "newdir") + if target_slash: + t += "/" + + # Without recursive does nothing + # ERROR: erroneously creates new directory + # fs.cp(s, t) + # assert fs.ls(target) == [] + + # With recursive + fs.cp(s, t, recursive=True) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert fs.isdir(fs_join(target, "newdir", "nesteddir")) + assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert fs.ls(target) == [] + + # Limit by maxdepth + # ERROR: maxdepth ignored here + + def test_copy_glob_to_existing_directory( + self, fs, fs_join, fs_path, fs_scenario_cp + ): + # Copy scenario 1g + source = fs_scenario_cp + + target = fs_join(fs_path, "target") + fs.mkdir(target) + + # for target_slash in [False, True]: + for target_slash in [False]: + t = target + "/" if target_slash else target + + # Without recursive + fs.cp(fs_join(source, "subdir", "*"), t) + # ERROR: this is not correct + # assert fs.isfile(fs_join(target, "subfile1")) + # assert fs.isfile(fs_join(target, "subfile2")) + # assert not fs.isdir(fs_join(target, "subdir")) + + # With recursive + + # Limit by maxdepth + + def test_copy_glob_to_new_directory(self, fs, fs_join, fs_path, fs_scenario_cp): + # Copy scenario 1h + source = fs_scenario_cp + + target = fs_join(fs_path, "target") + fs.mkdir(target) + + # for target_slash in [False, True]: + for target_slash in [False]: + t = fs_join(target, "newdir") + if target_slash: + t += "/" + + # Without recursive + fs.cp(fs_join(source, "subdir", "*"), t) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + # ERROR - do not copy empty directory + # assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert fs.ls(target) == [] + + # With recursive + fs.cp(fs_join(source, "subdir", "*"), t, recursive=True) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert fs.isdir(fs_join(target, "newdir", "nesteddir")) + assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert fs.ls(target) == [] + + # Limit by maxdepth + # ERROR: this is not correct + + def test_copy_list_of_files_to_existing_directory( + self, fs, fs_join, fs_path, fs_scenario_cp + ): + # Copy scenario 2a + source = fs_scenario_cp + + target = fs_join(fs_path, "target") + fs.mkdir(target) + + source_files = [ + fs_join(source, "file1"), + fs_join(source, "file2"), + fs_join(source, "subdir", "subfile1"), + ] + + # for target_slash in [False, True]: + for target_slash in [True]: + t = target + "/" if target_slash else target + + fs.cp(source_files, t) + assert fs.isfile(fs_join(target, "file1")) + assert fs.isfile(fs_join(target, "file2")) + # assert fs.isfile(fs_join(target, "subfile1")) # ERROR + + # fs.rm() + + def test_copy_list_of_files_to_new_directory( + self, fs, fs_join, fs_path, fs_scenario_cp + ): + # Copy scenario 2b + source = fs_scenario_cp + + target = fs_join(fs_path, "target") + fs.mkdir(target) + + source_files = [ + fs_join(source, "file1"), + fs_join(source, "file2"), + fs_join(source, "subdir", "subfile1"), + ] + + fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "file1")) + assert fs.isfile(fs_join(target, "newdir", "file2")) + # assert fs.isfile(fs_join(target, "newdir", "subfile1")) #Β ERROR + + # If no trailing slash on target it is interpreted as a filename not directory + def test_copy_two_files_new_directory(self, fs, fs_join, fs_path, fs_scenario_cp): + # This is a duplicate of test_copy_list_of_files_to_new_directory and + # can eventually be removed. source = fs_scenario_cp target = fs_join(fs_path, "target")