Skip to content

Commit

Permalink
clobber is not threadsafe but order no longer seems to matter
Browse files Browse the repository at this point in the history
  • Loading branch information
chapmanjacobd committed Jun 13, 2024
1 parent 62a3f12 commit 7a38b11
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 23 deletions.
50 changes: 27 additions & 23 deletions xklb/folders/merge_mv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os, shutil, sys
import concurrent.futures, os, shutil, sys

from xklb import usage
from xklb.utils import arggroups, argparse_utils, devices, file_utils, path_utils
Expand All @@ -9,6 +9,7 @@ def parse_args():
parser.add_argument("--copy", "--cp", "-c", action="store_true", help="Copy instead of move")
arggroups.clobber(parser)
arggroups.debug(parser)
parser.set_defaults(threads=1)

arggroups.paths_or_stdin(parser, destination=True)
parser.add_argument("destination", help="Destination directory")
Expand All @@ -17,10 +18,6 @@ def parse_args():
return args


class FolderExistsError(Exception):
pass


def mmv_file(args, source, destination):
if args.simulate:
print(source)
Expand All @@ -37,28 +34,16 @@ def mcp_file(args, source, destination):
shutil.copy2(source, destination)


def mmv_folders(args, mv_fn, sources, destination):
destination = os.path.realpath(destination)

if args.bsd:
# preserve trailing slash
sources = (os.path.realpath(s) + (os.sep if s.endswith(os.sep) else "") for s in sources)
else:
sources = (os.path.realpath(s) for s in sources)

def gen_src_dest(args, sources, destination):
for source in sources:
if os.path.isdir(source):
for p in sorted(
file_utils.rglob(source, args.ext or None)[0], key=lambda s: (s.count(os.sep), len(s), s), reverse=False
):
for p in file_utils.rglob_gen(source, args.ext or None):
file_dest = destination
if args.parent or (args.bsd and not source.endswith(os.sep)): # use BSD behavior
file_dest = os.path.join(file_dest, os.path.basename(source))
file_dest = os.path.join(file_dest, os.path.relpath(p, source))

p, file_dest = devices.clobber(args, p, file_dest)
if p:
mv_fn(args, p, file_dest)
yield p, file_dest

else:
file_dest = destination
Expand All @@ -67,9 +52,28 @@ def mmv_folders(args, mv_fn, sources, destination):
if path_utils.is_folder_dest(source, file_dest):
file_dest = os.path.join(file_dest, os.path.basename(source))

source, file_dest = devices.clobber(args, source, file_dest)
if source:
mv_fn(args, source, file_dest)
yield source, file_dest


def mmv_folders(args, mv_fn, sources, destination):
destination = os.path.realpath(destination)

if args.bsd:
# preserve trailing slash
sources = (os.path.realpath(s) + (os.sep if s.endswith(os.sep) else "") for s in sources)
else:
sources = (os.path.realpath(s) for s in sources)

def move_file(src, dest):
src, dest = devices.clobber(args, src, dest)
if src:
mv_fn(args, src, dest)

with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex:
for f in concurrent.futures.as_completed(
[ex.submit(move_file, src, dest) for src, dest in gen_src_dest(args, sources, destination)]
):
f.result()


def merge_mv():
Expand Down
1 change: 1 addition & 0 deletions xklb/folders/mergerfs_cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def parse_args():
arggroups.clobber(parser)
parser.set_defaults(file_over_file="delete-dest-hash rename-dest")
arggroups.debug(parser)
parser.set_defaults(threads=1)

arggroups.paths_or_stdin(parser, destination=True)
parser.add_argument("destination", help="Destination directory")
Expand Down
32 changes: 32 additions & 0 deletions xklb/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,38 @@ def rglob(
return files, filtered_files, folders


def rglob_gen(
base_dir: str | Path,
extensions=None, # None | Iterable[str]
exclude=None, # None | Iterable[str]
):
folders = set()
stack = [base_dir]
while stack:
current_dir = stack.pop()
try:
scanned_dir = os.scandir(current_dir)
except (FileNotFoundError, PermissionError):
pass
else:
for entry in scanned_dir:
if entry.is_dir(follow_symlinks=False):
if any(entry.name == pattern or fnmatch(entry.path, pattern) for pattern in exclude or []):
pass
else:
folders.add(entry.path)
stack.append(entry.path)
elif entry.is_symlink():
pass
else:
if extensions is None:
yield entry.path
else:
extension = entry.path.rsplit(".", 1)[-1].lower()
if extension in extensions:
yield entry.path


def file_temp_copy(src) -> str:
fo_dest = tempfile.NamedTemporaryFile(delete=False)
with open(src, "r+b") as fo_src:
Expand Down

0 comments on commit 7a38b11

Please sign in to comment.