From 34629ccc4d7da436372775122b0de006d55d22bd Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Thu, 20 Apr 2023 11:12:49 +0200 Subject: [PATCH 1/3] Ignore directories when copying list of files --- docs/source/copying.rst | 10 ---------- fsspec/spec.py | 12 +++++++++--- fsspec/tests/abstract/copy.py | 23 ++++++++++------------- fsspec/utils.py | 22 +++++++++++++++------- 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/docs/source/copying.rst b/docs/source/copying.rst index e2309e6ba..e72c628c7 100644 --- a/docs/source/copying.rst +++ b/docs/source/copying.rst @@ -287,11 +287,6 @@ Forward slashes are used for directory separators throughout. .. dropdown:: 2a. List of files to existing directory - .. warning:: - - This is not correct currently, it does not place all files in the same directory - (`issue 1234 `_). - .. code-block:: python cp(["source/file1", "source/file2", "source/subdir/subfile1"], "target/") @@ -309,11 +304,6 @@ Forward slashes are used for directory separators throughout. .. dropdown:: 2b. List of files to new directory - .. warning:: - - This is not correct currently, it does not place all files in the same directory - (`issue 1234 `_). - .. code-block:: python cp(["source/file1", "source/file2", "source/subdir/subfile1"], "target/newdir/") diff --git a/fsspec/spec.py b/fsspec/spec.py index 7bbf09099..35d8f69af 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -887,11 +887,13 @@ def get(self, rpath, lpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg isdir = isinstance(lpath, str) and ( lpath.endswith("/") or LocalFileSystem().isdir(lpath) ) + source_is_str = isinstance(rpath, str) lpaths = other_paths( rpaths, lpath, - exists=isdir and isinstance(rpath, str) and not rpath.endswith("/"), + exists=isdir and source_is_str and not rpath.endswith(("/", "/*")), is_dir=isdir, + flatten=not source_is_str, ) callback.set_size(len(lpaths)) @@ -940,11 +942,13 @@ def put(self, lpath, rpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg fs = LocalFileSystem() lpaths = fs.expand_path(lpath, recursive=recursive) isdir = isinstance(rpath, str) and (rpath.endswith("/") or self.isdir(rpath)) + source_is_str = isinstance(lpath, str) rpaths = other_paths( lpaths, rpath, - exists=isdir and isinstance(lpath, str) and not lpath.endswith("/"), + exists=isdir and source_is_str and not lpath.endswith(("/", "/*")), is_dir=isdir, + flatten=not source_is_str, ) callback.set_size(len(rpaths)) @@ -981,11 +985,13 @@ def copy(self, path1, path2, recursive=False, on_error=None, **kwargs): paths = self.expand_path(path1, recursive=recursive) isdir = isinstance(path2, str) and (path2.endswith("/") or self.isdir(path2)) + source_is_str = isinstance(path1, str) path2 = other_paths( paths, path2, - exists=isdir and isinstance(path1, str) and not path1.endswith("/"), + exists=isdir and source_is_str and not path1.endswith(("/", "/*")), is_dir=isdir, + flatten=not source_is_str, ) for p1, p2 in zip(paths, path2): diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index 371d45a77..1460e63b4 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -176,10 +176,10 @@ def test_copy_glob_to_existing_directory( # Without recursive fs.cp(fs_join(source, "subdir", "*"), t) - # ERROR: this is not correct - # assert fs.isfile(fs_join(target, "subfile1")) - # assert fs.isfile(fs_join(target, "subfile2")) - # assert not fs.isdir(fs_join(target, "subdir")) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + # assert not fs.isdir(fs_join(target, "nesteddir")) # ERROR + assert not fs.isdir(fs_join(target, "subdir")) # With recursive @@ -192,8 +192,7 @@ def test_copy_glob_to_new_directory(self, fs, fs_join, fs_path, fs_scenario_cp): target = fs_join(fs_path, "target") fs.mkdir(target) - # for target_slash in [False, True]: - for target_slash in [False]: + for target_slash in [False, True]: t = fs_join(target, "newdir") if target_slash: t += "/" @@ -238,16 +237,16 @@ def test_copy_list_of_files_to_existing_directory( fs_join(source, "subdir", "subfile1"), ] - # for target_slash in [False, True]: - for target_slash in [True]: + for target_slash in [False, True]: t = target + "/" if target_slash else target fs.cp(source_files, t) assert fs.isfile(fs_join(target, "file1")) assert fs.isfile(fs_join(target, "file2")) - # assert fs.isfile(fs_join(target, "subfile1")) # ERROR + assert fs.isfile(fs_join(target, "subfile1")) - # fs.rm() + fs.rm(fs.find(target)) + assert fs.ls(target) == [] def test_copy_list_of_files_to_new_directory( self, fs, fs_join, fs_path, fs_scenario_cp @@ -268,9 +267,7 @@ def test_copy_list_of_files_to_new_directory( assert fs.isdir(fs_join(target, "newdir")) assert fs.isfile(fs_join(target, "newdir", "file1")) assert fs.isfile(fs_join(target, "newdir", "file2")) - # assert fs.isfile(fs_join(target, "newdir", "subfile1")) # ERROR - - # If no trailing slash on target it is interpreted as a filename not directory + assert fs.isfile(fs_join(target, "newdir", "subfile1")) def test_copy_two_files_new_directory(self, fs, fs_join, fs_path, fs_scenario_cp): # This is a duplicate of test_copy_list_of_files_to_new_directory and diff --git a/fsspec/utils.py b/fsspec/utils.py index 01c44959f..51f8cc8d1 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -341,7 +341,7 @@ def common_prefix(paths): return "/".join(parts[0][:i]) -def other_paths(paths, path2, is_dir=None, exists=False): +def other_paths(paths, path2, is_dir=None, exists=False, flatten=False): """In bulk file operations, construct a new file tree from a list of files Parameters @@ -358,21 +358,29 @@ def other_paths(paths, path2, is_dir=None, exists=False): exists: bool (optional) For a str destination, it is already exists (and is a dir), files should end up inside. + flatten: bool (optional) + Whether to flatten the input directory tree structure so that the output files + are in the same directory. Returns ------- list of str """ + if isinstance(path2, str): is_dir = is_dir or path2.endswith("/") path2 = path2.rstrip("/") - cp = common_prefix(paths) - if exists: - cp = cp.rsplit("/", 1)[0] - if not cp and all(not s.startswith("/") for s in paths): - path2 = ["/".join([path2, p]) for p in paths] + + if flatten: + path2 = ["/".join((path2, p.split("/")[-1])) for p in paths] else: - path2 = [p.replace(cp, path2, 1) for p in paths] + cp = common_prefix(paths) + if exists: + cp = cp.rsplit("/", 1)[0] + if not cp and all(not s.startswith("/") for s in paths): + path2 = ["/".join([path2, p]) for p in paths] + else: + path2 = [p.replace(cp, path2, 1) for p in paths] else: assert len(paths) == len(path2) return path2 From c673ac5c0cac95f873ac8477dfd28675ff3cd9e2 Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Mon, 24 Apr 2023 12:07:04 +0100 Subject: [PATCH 2/3] Add trailing_sep and trailing_sep_maybe_asterisk functions --- fsspec/implementations/local.py | 23 +++++++++++++++++++++++ fsspec/spec.py | 28 ++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 034d06f84..bd0f91516 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -272,6 +272,29 @@ def make_path_posix(path, sep=os.sep): return path +def trailing_sep(path): + """Return True if the path ends with a path separator. + + A forward slash is always considered a path separator, even on Operating + Systems that normally use a backslash. + """ + return path.endswith(os.sep) or os.altsep and path.endswith(os.altsep) + + +def trailing_sep_maybe_asterisk(path): + """Return True if the path ends with a path separator and optionally an + asterisk. + + A forward slash is always considered a path separator, even on Operating + Systems that normally use a backslash. + """ + return ( + path.endswith((os.sep, os.sep + "*")) + or os.altsep + and path.endswith((os.altsep, os.altsep + "*")) + ) + + class LocalFileOpener(io.IOBase): def __init__( self, path, mode, autocommit=True, fs=None, compression=None, **kwargs diff --git a/fsspec/spec.py b/fsspec/spec.py index 35d8f69af..0eddea47f 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -879,19 +879,24 @@ def get(self, rpath, lpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg Calls get_file for each source. """ - from .implementations.local import LocalFileSystem, make_path_posix + from .implementations.local import ( + LocalFileSystem, + make_path_posix, + trailing_sep, + trailing_sep_maybe_asterisk, + ) if isinstance(lpath, str): lpath = make_path_posix(lpath) rpaths = self.expand_path(rpath, recursive=recursive) isdir = isinstance(lpath, str) and ( - lpath.endswith("/") or LocalFileSystem().isdir(lpath) + trailing_sep(lpath) or LocalFileSystem().isdir(lpath) ) source_is_str = isinstance(rpath, str) lpaths = other_paths( rpaths, lpath, - exists=isdir and source_is_str and not rpath.endswith(("/", "/*")), + exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(rpath), is_dir=isdir, flatten=not source_is_str, ) @@ -930,7 +935,12 @@ def put(self, lpath, rpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg Calls put_file for each source. """ - from .implementations.local import LocalFileSystem, make_path_posix + from .implementations.local import ( + LocalFileSystem, + make_path_posix, + trailing_sep, + trailing_sep_maybe_asterisk, + ) rpath = ( self._strip_protocol(rpath) @@ -941,12 +951,12 @@ def put(self, lpath, rpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg lpath = make_path_posix(lpath) fs = LocalFileSystem() lpaths = fs.expand_path(lpath, recursive=recursive) - isdir = isinstance(rpath, str) and (rpath.endswith("/") or self.isdir(rpath)) + isdir = isinstance(rpath, str) and (trailing_sep(rpath) or self.isdir(rpath)) source_is_str = isinstance(lpath, str) rpaths = other_paths( lpaths, rpath, - exists=isdir and source_is_str and not lpath.endswith(("/", "/*")), + exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(lpath), is_dir=isdir, flatten=not source_is_str, ) @@ -978,18 +988,20 @@ def copy(self, path1, path2, recursive=False, on_error=None, **kwargs): not-found exceptions will cause the path to be skipped; defaults to raise unless recursive is true, where the default is ignore """ + from .implementations.local import trailing_sep, trailing_sep_maybe_asterisk + if on_error is None and recursive: on_error = "ignore" elif on_error is None: on_error = "raise" paths = self.expand_path(path1, recursive=recursive) - isdir = isinstance(path2, str) and (path2.endswith("/") or self.isdir(path2)) + isdir = isinstance(path2, str) and (trailing_sep(path2) or self.isdir(path2)) source_is_str = isinstance(path1, str) path2 = other_paths( paths, path2, - exists=isdir and source_is_str and not path1.endswith(("/", "/*")), + exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(path1), is_dir=isdir, flatten=not source_is_str, ) From 3cbd55285b6a8835143c2530f45ad835a672d999 Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Mon, 24 Apr 2023 21:50:27 +0100 Subject: [PATCH 3/3] Add TODO comments to explain possible future refactor --- fsspec/implementations/local.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index bd0f91516..08d0c5dec 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -278,6 +278,9 @@ def trailing_sep(path): A forward slash is always considered a path separator, even on Operating Systems that normally use a backslash. """ + # TODO: if all incoming paths were posix-compliant then separator would + # always be a forward slash, simplifying this function. + # See https://github.com/fsspec/filesystem_spec/pull/1250 return path.endswith(os.sep) or os.altsep and path.endswith(os.altsep) @@ -288,6 +291,9 @@ def trailing_sep_maybe_asterisk(path): A forward slash is always considered a path separator, even on Operating Systems that normally use a backslash. """ + # TODO: if all incoming paths were posix-compliant then separator would + # always be a forward slash, simplifying this function. + # See https://github.com/fsspec/filesystem_spec/pull/1250 return ( path.endswith((os.sep, os.sep + "*")) or os.altsep