Skip to content

Commit

Permalink
Merge pull request #8107 from ThomasWaldmann/slashdot-hack-master
Browse files Browse the repository at this point in the history
slashdot hack (master)
  • Loading branch information
ThomasWaldmann committed Feb 20, 2024
2 parents f94cdee + e7bd18d commit 514cef5
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 28 deletions.
3 changes: 3 additions & 0 deletions docs/usage/create.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ Examples
# same, but list all files as we process them
$ borg create --list my-documents ~/Documents

# Backup /mnt/disk/docs, but strip path prefix using the slashdot hack
$ borg create /path/to/repo::docs /mnt/disk/./docs

# Backup ~/Documents and ~/src but exclude pyc files
$ borg create my-files \
~/Documents \
Expand Down
78 changes: 64 additions & 14 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,16 @@ def __init__(
self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)

@contextmanager
def create_helper(self, path, st, status=None, hardlinkable=True):
def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None):
if strip_prefix is not None:
assert not path.endswith(os.sep)
if strip_prefix.startswith(path + os.sep):
# still on a directory level that shall be stripped - do not create an item for this!
yield None, "x", False, None
return
# adjust path, remove stripped directory levels
path = path.removeprefix(strip_prefix)

sanitized_path = remove_dotdot_prefixes(path)
item = Item(path=sanitized_path)
hardlinked = hardlinkable and st.st_nlink > 1
Expand All @@ -1384,13 +1393,26 @@ def create_helper(self, path, st, status=None, hardlinkable=True):
chunks = item.chunks if "chunks" in item else None
self.hlm.remember(id=(st.st_ino, st.st_dev), info=chunks)

def process_dir_with_fd(self, *, path, fd, st):
with self.create_helper(path, st, "d", hardlinkable=False) as (item, status, hardlinked, hl_chunks):
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
def process_dir_with_fd(self, *, path, fd, st, strip_prefix):
with self.create_helper(path, st, "d", hardlinkable=False, strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
):
if item is not None:
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status

def process_dir(self, *, path, parent_fd, name, st):
with self.create_helper(path, st, "d", hardlinkable=False) as (item, status, hardlinked, hl_chunks):
def process_dir(self, *, path, parent_fd, name, st, strip_prefix):
with self.create_helper(path, st, "d", hardlinkable=False, strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
):
if item is None:
return status
with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op="dir_open") as fd:
# fd is None for directories on windows, in that case a race condition check is not possible.
if fd is not None:
Expand All @@ -1399,25 +1421,46 @@ def process_dir(self, *, path, parent_fd, name, st):
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status

def process_fifo(self, *, path, parent_fd, name, st):
with self.create_helper(path, st, "f") as (item, status, hardlinked, hl_chunks): # fifo
def process_fifo(self, *, path, parent_fd, name, st, strip_prefix):
with self.create_helper(path, st, "f", strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
): # fifo
if item is None:
return status
with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
with backup_io("fstat"):
st = stat_update_check(st, os.fstat(fd))
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status

def process_dev(self, *, path, parent_fd, name, st, dev_type):
with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hl_chunks): # char/block device
def process_dev(self, *, path, parent_fd, name, st, dev_type, strip_prefix):
with self.create_helper(path, st, dev_type, strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
): # char/block device
# looks like we can not work fd-based here without causing issues when trying to open/close the device
if item is None:
return status
with backup_io("stat"):
st = stat_update_check(st, os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False))
item.rdev = st.st_rdev
item.update(self.metadata_collector.stat_attrs(st, path))
return status

def process_symlink(self, *, path, parent_fd, name, st):
with self.create_helper(path, st, "s", hardlinkable=True) as (item, status, hardlinked, hl_chunks):
def process_symlink(self, *, path, parent_fd, name, st, strip_prefix):
with self.create_helper(path, st, "s", hardlinkable=True, strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
):
if item is None:
return status
fname = name if name is not None and parent_fd is not None else path
with backup_io("readlink"):
target = os.readlink(fname, dir_fd=parent_fd)
Expand Down Expand Up @@ -1466,8 +1509,15 @@ def process_pipe(self, *, path, cache, fd, mode, user=None, group=None):
self.add_item(item, stats=self.stats)
return status

def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal, last_try=False):
with self.create_helper(path, st, None) as (item, status, hardlinked, hl_chunks): # no status yet
def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal, last_try=False, strip_prefix):
with self.create_helper(path, st, None, strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
): # no status yet
if item is None:
return status
with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=True) as fd:
with backup_io("fstat"):
st = stat_update_check(st, os.fstat(fd))
Expand Down
77 changes: 64 additions & 13 deletions src/borg/archiver/create_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ..helpers import archivename_validator, FilesCacheMode
from ..helpers import eval_escapes
from ..helpers import timestamp, archive_ts_now
from ..helpers import get_cache_dir, os_stat
from ..helpers import get_cache_dir, os_stat, get_strip_prefix
from ..helpers import dir_is_tagged
from ..helpers import log_multi
from ..helpers import basic_json_data, json_print
Expand Down Expand Up @@ -107,6 +107,7 @@ def create_inner(archive, cache, fso):
pipe_bin = sys.stdin.buffer
pipe = TextIOWrapper(pipe_bin, errors="surrogateescape")
for path in iter_separated(pipe, paths_sep):
strip_prefix = get_strip_prefix(path)
path = os.path.normpath(path)
try:
with backup_io("stat"):
Expand All @@ -120,6 +121,7 @@ def create_inner(archive, cache, fso):
cache=cache,
read_special=args.read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
except BackupError as e:
self.print_warning_instance(BackupWarning(path, e))
Expand Down Expand Up @@ -157,6 +159,8 @@ def create_inner(archive, cache, fso):
if not dry_run and status is not None:
fso.stats.files_stats[status] += 1
continue

strip_prefix = get_strip_prefix(path)
path = os.path.normpath(path)
try:
with backup_io("stat"):
Expand All @@ -176,6 +180,7 @@ def create_inner(archive, cache, fso):
restrict_dev=restrict_dev,
read_special=args.read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
# if we get back here, we've finished recursing into <path>,
# we do not ever want to get back in there (even if path is given twice as recursion root)
Expand Down Expand Up @@ -274,7 +279,7 @@ def create_inner(archive, cache, fso):
else:
create_inner(None, None, None)

def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run):
def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run, strip_prefix):
"""
Call the right method on the given FilesystemObjectProcessor.
"""
Expand All @@ -287,13 +292,21 @@ def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, d
try:
if stat.S_ISREG(st.st_mode):
return fso.process_file(
path=path, parent_fd=parent_fd, name=name, st=st, cache=cache, last_try=last_try
path=path,
parent_fd=parent_fd,
name=name,
st=st,
cache=cache,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISDIR(st.st_mode):
return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix)
elif stat.S_ISLNK(st.st_mode):
if not read_special:
return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_symlink(
path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
)
else:
try:
st_target = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=True)
Expand All @@ -310,12 +323,17 @@ def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, d
cache=cache,
flags=flags_special_follow,
last_try=last_try,
strip_prefix=strip_prefix,
)
else:
return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_symlink(
path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
)
elif stat.S_ISFIFO(st.st_mode):
if not read_special:
return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_fifo(
path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
)
else:
return fso.process_file(
path=path,
Expand All @@ -325,10 +343,13 @@ def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, d
cache=cache,
flags=flags_special,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISCHR(st.st_mode):
if not read_special:
return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c")
return fso.process_dev(
path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c", strip_prefix=strip_prefix
)
else:
return fso.process_file(
path=path,
Expand All @@ -338,10 +359,13 @@ def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, d
cache=cache,
flags=flags_special,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISBLK(st.st_mode):
if not read_special:
return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b")
return fso.process_dev(
path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b", strip_prefix=strip_prefix
)
else:
return fso.process_file(
path=path,
Expand All @@ -351,6 +375,7 @@ def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, d
cache=cache,
flags=flags_special,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISSOCK(st.st_mode):
# Ignore unix sockets
Expand Down Expand Up @@ -401,6 +426,7 @@ def _rec_walk(
restrict_dev,
read_special,
dry_run,
strip_prefix,
):
"""
Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.
Expand Down Expand Up @@ -457,6 +483,7 @@ def _rec_walk(
cache=cache,
read_special=read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
else:
with OsOpen(
Expand All @@ -474,7 +501,9 @@ def _rec_walk(
if not recurse_excluded_dir:
if keep_exclude_tags:
if not dry_run:
fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
fso.process_dir_with_fd(
path=path, fd=child_fd, st=st, strip_prefix=strip_prefix
)
for tag_name in tag_names:
tag_path = os.path.join(path, tag_name)
self._rec_walk(
Expand All @@ -491,12 +520,13 @@ def _rec_walk(
restrict_dev=restrict_dev,
read_special=read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
self.print_file_status("-", path) # excluded
return
if not recurse_excluded_dir:
if not dry_run:
status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st, strip_prefix=strip_prefix)
else:
status = "+" # included (dir)
if recurse:
Expand All @@ -518,6 +548,7 @@ def _rec_walk(
restrict_dev=restrict_dev,
read_special=read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)

except BackupError as e:
Expand All @@ -541,6 +572,11 @@ def build_parser_create(self, subparsers, common_parser, mid_common_parser):
that means if relative paths are desired, the command has to be run from the correct
directory.
The slashdot hack in paths (recursion roots) is triggered by using ``/./``:
``/this/gets/stripped/./this/gets/archived`` means to process that fs object, but
strip the prefix on the left side of ``./`` from the archived items (in this case,
``this/gets/archived`` will be the path in the archived item).
When giving '-' as path, borg will read data from standard input and create a
file 'stdin' in the created archive from that data. In some cases it's more
appropriate to use --content-from-command, however. See section *Reading from
Expand Down Expand Up @@ -680,8 +716,8 @@ def build_parser_create(self, subparsers, common_parser, mid_common_parser):
- 'i' = backup data was read from standard input (stdin)
- '?' = missing status code (if you see this, please file a bug report!)
Reading from stdin
++++++++++++++++++
Reading backup data from stdin
++++++++++++++++++++++++++++++
There are two methods to read from stdin. Either specify ``-`` as path and
pipe directly to borg::
Expand Down Expand Up @@ -712,6 +748,21 @@ def build_parser_create(self, subparsers, common_parser, mid_common_parser):
By default, the content read from stdin is stored in a file called 'stdin'.
Use ``--stdin-name`` to change the name.
Feeding all file paths from externally
++++++++++++++++++++++++++++++++++++++
Usually, you give a starting path (recursion root) to borg and then borg
automatically recurses, finds and backs up all fs objects contained in
there (optionally considering include/exclude rules).
If you need more control and you want to give every single fs object path
to borg (maybe implementing your own recursion or your own rules), you can use
``--paths-from-stdin`` or ``--paths-from-command`` (with the latter, borg will
fail to create an archive should the command fail).
Borg supports paths with the slashdot hack to strip path prefixes here also.
So, be careful not to unintentionally trigger that.
"""
)

Expand Down
2 changes: 1 addition & 1 deletion src/borg/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from .fs import ensure_dir, join_base_dir, get_socket_filename
from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, umount
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount
from .fs import O_, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime
from .fs import HardLinkManager
from .misc import sysinfo, log_multi, consume
Expand Down
15 changes: 15 additions & 0 deletions src/borg/helpers/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,21 @@ def make_path_safe(path):
return path


def get_strip_prefix(path):
# similar to how rsync does it, we allow users to give paths like:
# /this/gets/stripped/./this/is/kept
# the whole path is what is used to read from the fs,
# the strip_prefix will be /this/gets/stripped/ and
# this/is/kept is the path being archived.
pos = path.find("/./") # detect slashdot hack
if pos > 0:
# found a prefix to strip! make sure it ends with one "/"!
return os.path.normpath(path[:pos]) + os.sep
else:
# no or empty prefix, nothing to strip!
return None


_dotdot_re = re.compile(r"^(\.\./)+")


Expand Down

0 comments on commit 514cef5

Please sign in to comment.