Skip to content

Commit

Permalink
Merge pull request #8060 from ThomasWaldmann/slashdot-hack-1.4
Browse files Browse the repository at this point in the history
slashdot hack (1.4-maint)
  • Loading branch information
ThomasWaldmann committed Jan 30, 2024
2 parents e744e04 + 5b96d5a commit 376ad6d
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 37 deletions.
3 changes: 3 additions & 0 deletions docs/usage/create.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ Examples
# same, but list all files as we process them
$ borg create --list /path/to/repo::my-documents ~/Documents

# Backup /mnt/disk/docs, but strip path prefix using the slashdot hack
$ borg create /path/to/repo::docs /mnt/disk/./docs

# Backup ~/Documents and ~/src but exclude pyc files
$ borg create /path/to/repo::my-files \
~/Documents \
Expand Down
48 changes: 34 additions & 14 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,16 @@ def __init__(self, *, metadata_collector, cache, key,
self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)

@contextmanager
def create_helper(self, path, st, status=None, hardlinkable=True):
def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None):
if strip_prefix is not None:
assert not path.endswith(os.sep)
if strip_prefix.startswith(path + os.sep):
# still on a directory level that shall be stripped - do not create an item for this!
yield None, 'x', False, False
return
# adjust path, remove stripped directory levels
path = path.removeprefix(strip_prefix)

safe_path = make_path_safe(path)
item = Item(path=safe_path)
hardlink_master = False
Expand All @@ -1318,13 +1327,16 @@ def create_helper(self, path, st, status=None, hardlinkable=True):
if hardlink_master:
self.hard_links[(st.st_ino, st.st_dev)] = safe_path

def process_dir_with_fd(self, *, path, fd, st):
with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
def process_dir_with_fd(self, *, path, fd, st, strip_prefix):
with self.create_helper(path, st, 'd', hardlinkable=False, strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master):
if item is not None:
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status

def process_dir(self, *, path, parent_fd, name, st):
with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
def process_dir(self, *, path, parent_fd, name, st, strip_prefix):
with self.create_helper(path, st, 'd', hardlinkable=False, strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master):
if item is None:
return status
with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir,
noatime=True, op='dir_open') as fd:
# fd is None for directories on windows, in that case a race condition check is not possible.
Expand All @@ -1334,8 +1346,10 @@ def process_dir(self, *, path, parent_fd, name, st):
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status

def process_fifo(self, *, path, parent_fd, name, st):
with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master): # fifo
def process_fifo(self, *, path, parent_fd, name, st, strip_prefix):
with self.create_helper(path, st, 'f', strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master): # fifo
if item is None:
return status
with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
with backup_io('fstat'):
st = stat_update_check(st, os.fstat(fd))
Expand All @@ -1344,9 +1358,11 @@ def process_fifo(self, *, path, parent_fd, name, st):
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status

def process_dev(self, *, path, parent_fd, name, st, dev_type):
with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master): # char/block device
def process_dev(self, *, path, parent_fd, name, st, dev_type, strip_prefix):
with self.create_helper(path, st, dev_type, strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master): # char/block device
# looks like we can not work fd-based here without causing issues when trying to open/close the device
if item is None:
return status
with backup_io('stat'):
st = stat_update_check(st, os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False))
item.rdev = st.st_rdev
Expand All @@ -1355,11 +1371,13 @@ def process_dev(self, *, path, parent_fd, name, st, dev_type):
item.update(self.metadata_collector.stat_attrs(st, path))
return status

def process_symlink(self, *, path, parent_fd, name, st):
def process_symlink(self, *, path, parent_fd, name, st, strip_prefix):
# note: using hardlinkable=False because we can not support hardlinked symlinks,
# due to the dual-use of item.source, see issue #2343:
# hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks.
with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
with self.create_helper(path, st, 's', hardlinkable=False, strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master):
if item is None:
return status
fname = name if name is not None and parent_fd is not None else path
with backup_io('readlink'):
source = os.readlink(fname, dir_fd=parent_fd)
Expand Down Expand Up @@ -1392,8 +1410,10 @@ def process_pipe(self, *, path, cache, fd, mode, user, group):
self.add_item(item, stats=self.stats)
return status

def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal):
with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet
def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal, strip_prefix):
with self.create_helper(path, st, None, strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master): # no status yet
if item is None:
return status
with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=True) as fd:
with backup_io('fstat'):
st = stat_update_check(st, os.fstat(fd))
Expand Down
71 changes: 48 additions & 23 deletions src/borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
from .helpers import safe_encode, remove_surrogates, bin_to_hex, hex_to_bin, prepare_dump_dict, eval_escapes
from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
from .helpers import timestamp, utcnow
from .helpers import get_cache_dir, os_stat
from .helpers import get_cache_dir, os_stat, get_strip_prefix
from .helpers import Manifest, AI_HUMAN_SORT_KEYS
from .helpers import hardlinkable
from .helpers import StableDict
Expand Down Expand Up @@ -565,12 +565,14 @@ def create_inner(archive, cache, fso):
pipe_bin = sys.stdin.buffer
pipe = TextIOWrapper(pipe_bin, errors='surrogateescape')
for path in iter_separated(pipe, paths_sep):
strip_prefix = get_strip_prefix(path)
path = os.path.normpath(path)
try:
with backup_io('stat'):
st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
status = self._process_any(path=path, parent_fd=None, name=None, st=st, fso=fso,
cache=cache, read_special=args.read_special, dry_run=dry_run)
cache=cache, read_special=args.read_special, dry_run=dry_run,
strip_prefix=strip_prefix)
except BackupError as e:
self.print_warning_instance(BackupWarning(path, e))
status = 'E'
Expand Down Expand Up @@ -598,6 +600,8 @@ def create_inner(archive, cache, fso):
status = '-'
self.print_file_status(status, path)
continue

strip_prefix = get_strip_prefix(path)
path = os.path.normpath(path)
try:
with backup_io('stat'):
Expand All @@ -607,7 +611,8 @@ def create_inner(archive, cache, fso):
fso=fso, cache=cache, matcher=matcher,
exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes,
restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run)
restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run,
strip_prefix=strip_prefix)
# if we get back here, we've finished recursing into <path>,
# we do not ever want to get back in there (even if path is given twice as recursion root)
skip_inodes.add((st.st_ino, st.st_dev))
Expand Down Expand Up @@ -674,20 +679,20 @@ def create_inner(archive, cache, fso):
else:
create_inner(None, None, None)

def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run):
def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run, strip_prefix):
"""
Call the right method on the given FilesystemObjectProcessor.
"""

if dry_run:
return '-'
elif stat.S_ISREG(st.st_mode):
return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache, strip_prefix=strip_prefix)
elif stat.S_ISDIR(st.st_mode):
return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix)
elif stat.S_ISLNK(st.st_mode):
if not read_special:
return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix)
else:
try:
st_target = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=True)
Expand All @@ -697,27 +702,27 @@ def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, d
special = is_special(st_target.st_mode)
if special:
return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target,
cache=cache, flags=flags_special_follow)
cache=cache, flags=flags_special_follow, strip_prefix=strip_prefix)
else:
return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix)
elif stat.S_ISFIFO(st.st_mode):
if not read_special:
return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix)
else:
return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st,
cache=cache, flags=flags_special)
cache=cache, flags=flags_special, strip_prefix=strip_prefix)
elif stat.S_ISCHR(st.st_mode):
if not read_special:
return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c')
return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c', strip_prefix=strip_prefix)
else:
return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st,
cache=cache, flags=flags_special)
cache=cache, flags=flags_special, strip_prefix=strip_prefix)
elif stat.S_ISBLK(st.st_mode):
if not read_special:
return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b')
return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b', strip_prefix=strip_prefix)
else:
return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st,
cache=cache, flags=flags_special)
cache=cache, flags=flags_special, strip_prefix=strip_prefix)
elif stat.S_ISSOCK(st.st_mode):
# Ignore unix sockets
return
Expand All @@ -733,7 +738,7 @@ def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, d

def _rec_walk(self, *, path, parent_fd, name, fso, cache, matcher,
exclude_caches, exclude_if_present, keep_exclude_tags,
skip_inodes, restrict_dev, read_special, dry_run):
skip_inodes, restrict_dev, read_special, dry_run, strip_prefix):
"""
Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.
Expand Down Expand Up @@ -781,7 +786,7 @@ def _rec_walk(self, *, path, parent_fd, name, fso, cache, matcher,
# directories cannot go in this branch because they can be excluded based on tag
# files they might contain
status = self._process_any(path=path, parent_fd=parent_fd, name=name, st=st, fso=fso, cache=cache,
read_special=read_special, dry_run=dry_run)
read_special=read_special, dry_run=dry_run, strip_prefix=strip_prefix)
else:
with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir,
noatime=True, op='dir_open') as child_fd:
Expand All @@ -797,19 +802,19 @@ def _rec_walk(self, *, path, parent_fd, name, fso, cache, matcher,
if not recurse_excluded_dir:
if keep_exclude_tags:
if not dry_run:
fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
fso.process_dir_with_fd(path=path, fd=child_fd, st=st, strip_prefix=strip_prefix)
for tag_name in tag_names:
tag_path = os.path.join(path, tag_name)
self._rec_walk(
path=tag_path, parent_fd=child_fd, name=tag_name, fso=fso, cache=cache,
matcher=matcher, exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run, strip_prefix=strip_prefix)
self.print_file_status('x', path)
return
if not recurse_excluded_dir:
if not dry_run:
status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st, strip_prefix=strip_prefix)
else:
status = '-'
if recurse:
Expand All @@ -821,7 +826,7 @@ def _rec_walk(self, *, path, parent_fd, name, fso, cache, matcher,
path=normpath, parent_fd=child_fd, name=dirent.name, fso=fso, cache=cache, matcher=matcher,
exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, restrict_dev=restrict_dev,
read_special=read_special, dry_run=dry_run)
read_special=read_special, dry_run=dry_run, strip_prefix=strip_prefix)
except BackupError as e:
self.print_warning_instance(BackupWarning(path, e))
status = 'E'
Expand Down Expand Up @@ -3391,6 +3396,11 @@ def define_borg_mount(parser):
that means if relative paths are desired, the command has to be run from the correct
directory.
The slashdot hack in paths (recursion roots) is triggered by using ``/./``:
``/this/gets/stripped/./this/gets/archived`` means to process that fs object, but
strip the prefix on the left side of ``./`` from the archived items (in this case,
``this/gets/archived`` will be the path in the archived item).
When giving '-' as path, borg will read data from standard input and create a
file 'stdin' in the created archive from that data. In some cases it's more
appropriate to use --content-from-command, however. See section *Reading from
Expand Down Expand Up @@ -3530,8 +3540,8 @@ def define_borg_mount(parser):
- 'x' = excluded, item was *not* backed up
- '?' = missing status code (if you see this, please file a bug report!)
Reading from stdin
++++++++++++++++++
Reading backup data from stdin
++++++++++++++++++++++++++++++
There are two methods to read from stdin. Either specify ``-`` as path and
pipe directly to borg::
Expand Down Expand Up @@ -3562,6 +3572,21 @@ def define_borg_mount(parser):
By default, the content read from stdin is stored in a file called 'stdin'.
Use ``--stdin-name`` to change the name.
Feeding all file paths from externally
++++++++++++++++++++++++++++++++++++++
Usually, you give a starting path (recursion root) to borg and then borg
automatically recurses, finds and backs up all fs objects contained in
there (optionally considering include/exclude rules).
If you need more control and you want to give every single fs object path
to borg (maybe implementing your own recursion or your own rules), you can use
``--paths-from-stdin`` or ``--paths-from-command`` (with the latter, borg will
fail to create an archive should the command fail).
Borg supports paths with the slashdot hack to strip path prefixes here also.
So, be careful not to unintentionally trigger that.
""")

subparser = subparsers.add_parser('create', parents=[common_parser], add_help=False,
Expand Down

0 comments on commit 376ad6d

Please sign in to comment.