diff --git a/docs/source/copying.rst b/docs/source/copying.rst index ffee65419..d091f35fd 100644 --- a/docs/source/copying.rst +++ b/docs/source/copying.rst @@ -119,11 +119,6 @@ Forward slashes are used for directory separators throughout. .. dropdown:: 1e. Directory to existing directory - .. warning:: - - ``maxdepth`` is not yet implemented for copying functions - (`issue 1231 `_). - .. code-block:: python cp("source/subdir/", "target/", recursive=True) @@ -168,12 +163,21 @@ Forward slashes are used for directory separators throughout. └── 📁 nesteddir └── 📄 nestedfile -.. dropdown:: 1f. Directory to new directory + Again the depth of recursion can be controlled using the ``maxdepth`` keyword argument, for + example: + + .. code-block:: python + + cp("source/subdir", "target/", recursive=True, maxdepth=1) - .. warning:: + results in:: + + 📁 target + └── 📁 subdir + ├── 📄 subfile1 + └── 📄 subfile2 - ``maxdepth`` is not yet implemented for copying functions - (`issue 1231 `_). +.. dropdown:: 1f. Directory to new directory .. code-block:: python @@ -192,7 +196,18 @@ Forward slashes are used for directory separators throughout. They are recommended to explicitly indicate both are directories. The ``recursive=True`` keyword argument is required otherwise the call does nothing. The depth - of recursion can be controlled using the ``maxdepth`` keyword argument. + of recursion can be controlled using the ``maxdepth`` keyword argument, for example: + + .. code-block:: python + + cp("source/subdir/", "target/newdir/", recursive=True, maxdepth=1) + + results in:: + + 📁 target + └── 📁 newdir + ├── 📄 subfile1 + └── 📄 subfile2 .. dropdown:: 1g. Glob to existing directory @@ -222,11 +237,21 @@ Forward slashes are used for directory separators throughout. └── 📁 nesteddir └── 📄 nestedfile - The depth of recursion can be controlled by the ``maxdepth`` keyword argument. - The trailing slash on ``"target/"`` is optional but recommended as it explicitly indicates that the target is a directory. + The depth of recursion can be controlled by the ``maxdepth`` keyword argument, for example: + + .. code-block:: python + + cp("source/subdir/*", "target/", recursive=True, maxdepth=1) + + results in:: + + 📁 target + ├── 📄 subfile1 + └── 📄 subfile2 + .. dropdown:: 1h. Glob to new directory Nonrecursive @@ -257,11 +282,22 @@ Forward slashes are used for directory separators throughout. └── 📁 nesteddir └── 📄 nestedfile - The depth of recursion can be controlled by the ``maxdepth`` keyword argument. - The trailing slash on the ``target`` is optional but recommended as it explicitly indicates that it is a directory. + The depth of recursion can be controlled by the ``maxdepth`` keyword argument, for example: + + .. code-block:: python + + cp("source/subdir/*", "target/newdir/", recursive=True, maxdepth=1) + + results in:: + + 📁 target + └── 📁 newdir + ├── 📄 subfile1 + └── 📄 subfile2 + These calls fail if the ``target`` file system is not capable of creating the directory, for example if it is write-only or if ``auto_mkdir=False``. There is no command line equivalent of this scenario without an explicit ``mkdir`` to create the new directory. diff --git a/fsspec/spec.py b/fsspec/spec.py index 2233b224e..db401b246 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -869,7 +869,15 @@ def get_file( if not isfilelike(lpath): outfile.close() - def get(self, rpath, lpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwargs): + def get( + self, + rpath, + lpath, + recursive=False, + callback=_DEFAULT_CALLBACK, + maxdepth=None, + **kwargs, + ): """Copy file(s) to local. Copies a specific file or tree of files (if recursive=True). If lpath @@ -887,8 +895,8 @@ def get(self, rpath, lpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg ) source_is_str = isinstance(rpath, str) - rpaths = self.expand_path(rpath, recursive=recursive) - if source_is_str and not recursive: + rpaths = self.expand_path(rpath, recursive=recursive, maxdepth=maxdepth) + if source_is_str and (not recursive or maxdepth is not None): # Non-recursive glob does not copy directories rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))] if not rpaths: @@ -932,7 +940,15 @@ def put_file(self, lpath, rpath, callback=_DEFAULT_CALLBACK, **kwargs): segment_len = len(data) callback.relative_update(segment_len) - def put(self, lpath, rpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwargs): + def put( + self, + lpath, + rpath, + recursive=False, + callback=_DEFAULT_CALLBACK, + maxdepth=None, + **kwargs, + ): """Copy file(s) from local. Copies a specific file or tree of files (if recursive=True). If rpath @@ -952,8 +968,8 @@ def put(self, lpath, rpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg lpath = make_path_posix(lpath) fs = LocalFileSystem() source_is_str = isinstance(lpath, str) - lpaths = fs.expand_path(lpath, recursive=recursive) - if source_is_str and not recursive: + lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth) + if source_is_str and (not recursive or maxdepth is not None): # Non-recursive glob does not copy directories lpaths = [p for p in lpaths if not (trailing_sep(p) or self.isdir(p))] if not lpaths: @@ -992,7 +1008,9 @@ def tail(self, path, size=1024): def cp_file(self, path1, path2, **kwargs): raise NotImplementedError - def copy(self, path1, path2, recursive=False, on_error=None, **kwargs): + def copy( + self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs + ): """Copy within two locations in the filesystem on_error : "raise", "ignore" @@ -1008,8 +1026,8 @@ def copy(self, path1, path2, recursive=False, on_error=None, **kwargs): on_error = "raise" source_is_str = isinstance(path1, str) - paths = self.expand_path(path1, recursive=recursive) - if source_is_str and not recursive: + paths = self.expand_path(path1, recursive=recursive, maxdepth=maxdepth) + if source_is_str and (not recursive or maxdepth is not None): # Non-recursive glob does not copy directories paths = [p for p in paths if not (trailing_sep(p) or self.isdir(p))] if not paths: @@ -1051,11 +1069,16 @@ def expand_path(self, path, recursive=False, maxdepth=None, **kwargs): bit = set(self.glob(p, **kwargs)) out |= bit if recursive: + # glob call above expanded one depth so if maxdepth is defined + # then decrement it in expand_path call below. If it is zero + # after decrementing then avoid expand_path call. + if maxdepth is not None and maxdepth <= 1: + continue out |= set( self.expand_path( list(bit), recursive=recursive, - maxdepth=maxdepth, + maxdepth=maxdepth - 1 if maxdepth is not None else None, **kwargs, ) ) diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index 6173bbfeb..eb283649b 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -102,15 +102,9 @@ def test_copy_directory_to_existing_directory( assert fs.isfile(fs_join(target, "subfile2")) assert fs.isdir(fs_join(target, "nesteddir")) assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) - fs.rm( - [ - fs_join(target, "subfile1"), - fs_join(target, "subfile2"), - fs_join(target, "nesteddir"), - ], - recursive=True, - ) + fs.rm(fs.ls(target, detail=False), recursive=True) else: assert fs.isdir(fs_join(target, "subdir")) assert fs.isfile(fs_join(target, "subdir", "subfile1")) @@ -121,8 +115,23 @@ def test_copy_directory_to_existing_directory( fs.rm(fs_join(target, "subdir"), recursive=True) assert fs.ls(target) == [] - # Limit by maxdepth - # ERROR: maxdepth ignored here + # Limit recursive by maxdepth + fs.cp(s, t, recursive=True, maxdepth=1) + if source_slash: + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.exists(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm(fs.ls(target, detail=False), recursive=True) + else: + assert fs.isdir(fs_join(target, "subdir")) + assert fs.isfile(fs_join(target, "subdir", "subfile1")) + assert fs.isfile(fs_join(target, "subdir", "subfile2")) + assert not fs.exists(fs_join(target, "subdir", "nesteddir")) + + fs.rm(fs_join(target, "subdir"), recursive=True) + assert fs.ls(target) == [] def test_copy_directory_to_new_directory( self, fs, fs_join, fs_path, fs_scenario_cp @@ -152,12 +161,21 @@ def test_copy_directory_to_new_directory( assert fs.isfile(fs_join(target, "newdir", "subfile2")) assert fs.isdir(fs_join(target, "newdir", "nesteddir")) assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) fs.rm(fs_join(target, "newdir"), recursive=True) assert fs.ls(target) == [] - # Limit by maxdepth - # ERROR: maxdepth ignored here + # Limit recursive by maxdepth + fs.cp(s, t, recursive=True, maxdepth=1) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert fs.ls(target) == [] def test_copy_glob_to_existing_directory( self, fs, fs_join, fs_path, fs_scenario_cp @@ -193,8 +211,15 @@ def test_copy_glob_to_existing_directory( fs.rm(fs.ls(target, detail=False), recursive=True) assert fs.ls(target) == [] - # Limit by maxdepth - # ERROR: maxdepth ignored here + # Limit recursive by maxdepth + fs.cp(fs_join(source, "subdir", "*"), t, recursive=True, maxdepth=1) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.exists(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + + fs.rm(fs.ls(target, detail=False), recursive=True) + assert fs.ls(target) == [] def test_copy_glob_to_new_directory(self, fs, fs_join, fs_path, fs_scenario_cp): # Copy scenario 1h @@ -234,8 +259,17 @@ def test_copy_glob_to_new_directory(self, fs, fs_join, fs_path, fs_scenario_cp): fs.rm(fs_join(target, "newdir"), recursive=True) assert fs.ls(target) == [] - # Limit by maxdepth - # ERROR: this is not correct + # Limit recursive by maxdepth + fs.cp(fs_join(source, "subdir", "*"), t, recursive=True, maxdepth=1) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs.ls(target, detail=False), recursive=True) + assert fs.ls(target) == [] def test_copy_list_of_files_to_existing_directory( self, fs, fs_join, fs_path, fs_scenario_cp