Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sanitize paths during archive creation and extraction #7108

Merged
merged 4 commits into from Jun 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 3 additions & 5 deletions src/borg/archive.py
Expand Up @@ -32,7 +32,7 @@
from .platform import uid2user, user2uid, gid2group, group2gid
from .helpers import parse_timestamp, archive_ts_now
from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd
from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd, remove_dotdot_prefixes
from .helpers import StableDict
from .helpers import bin_to_hex
from .helpers import safe_ns
Expand Down Expand Up @@ -853,8 +853,6 @@ def same_item(item, st):
return

dest = self.cwd
if item.path.startswith(("/", "../")):
raise Exception("Path should be relative and local")
ThomasWaldmann marked this conversation as resolved.
Show resolved Hide resolved
path = os.path.join(dest, item.path)
# Attempt to remove existing files, ignore errors on failure
try:
Expand Down Expand Up @@ -1376,8 +1374,8 @@ def __init__(

@contextmanager
def create_helper(self, path, st, status=None, hardlinkable=True):
safe_path = make_path_safe(path)
item = Item(path=safe_path)
sanitized_path = remove_dotdot_prefixes(path)
item = Item(path=sanitized_path)
hardlinked = hardlinkable and st.st_nlink > 1
hl_chunks = None
update_map = False
Expand Down
15 changes: 1 addition & 14 deletions src/borg/archiver/_common.py
Expand Up @@ -9,6 +9,7 @@
from ..cache import Cache, assert_secure
from ..helpers import Error
from ..helpers import SortBySpec, positive_int_validator, location_validator, Location, relative_time_marker_validator
from ..helpers import Highlander
from ..helpers.nanorst import rst_to_terminal
from ..manifest import Manifest, AI_HUMAN_SORT_KEYS
from ..patterns import PatternMatcher
Expand Down Expand Up @@ -246,20 +247,6 @@ def wrapper(self, args, repository, manifest, **kwargs):
return wrapper


class Highlander(argparse.Action):
"""make sure some option is only given once"""

def __init__(self, *args, **kwargs):
self.__called = False
super().__init__(*args, **kwargs)

def __call__(self, parser, namespace, values, option_string=None):
if self.__called:
raise argparse.ArgumentError(self, "There can be only one.")
self.__called = True
setattr(namespace, self.dest, values)


# You can use :ref:`xyz` in the following usage pages. However, for plain-text view,
# e.g. through "borg ... --help", define a substitution for the reference here.
# It will replace the entire :ref:`foo` verbatim.
Expand Down
3 changes: 2 additions & 1 deletion src/borg/archiver/create_cmd.py
Expand Up @@ -27,6 +27,7 @@
from ..helpers import flags_root, flags_dir, flags_special_follow, flags_special
from ..helpers import sig_int, ignore_sigint
from ..helpers import iter_separated
from ..helpers import MakePathSafeAction
from ..manifest import Manifest
from ..patterns import PatternMatcher
from ..platform import is_win32
Expand Down Expand Up @@ -766,7 +767,7 @@ def build_parser_create(self, subparsers, common_parser, mid_common_parser):
metavar="NAME",
dest="stdin_name",
default="stdin",
action=Highlander,
action=MakePathSafeAction,
help="use NAME in archive for stdin data (default: %(default)r)",
)
subparser.add_argument(
Expand Down
3 changes: 2 additions & 1 deletion src/borg/helpers/__init__.py
Expand Up @@ -13,7 +13,7 @@
from .errors import Error, ErrorWithTraceback, IntegrityError, DecompressionError
from .fs import ensure_dir, join_base_dir, get_socket_filename
from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
from .fs import dir_is_tagged, dir_is_cachedir, make_path_safe, scandir_inorder
from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, umount
from .fs import O_, flags_root, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime
from .fs import HardLinkManager
Expand All @@ -31,6 +31,7 @@
from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status
from .parseformat import swidth_slice, ellipsis_truncate
from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict
from .parseformat import Highlander, MakePathSafeAction
from .process import daemonize, daemonizing
from .process import signal_handler, raising_signal_handler, sig_int, ignore_sigint, SigHup, SigTerm
from .process import popen_with_error_handling, is_terminal, prepare_subprocess_env, create_filter_process
Expand Down
62 changes: 58 additions & 4 deletions src/borg/helpers/fs.py
Expand Up @@ -217,12 +217,66 @@ def dir_is_tagged(path, exclude_caches, exclude_if_present):
return tag_names


_safe_re = re.compile(r"^((\.\.)?/+)+")
def make_path_safe(path):
"""
Make path safe by making it relative and normalized.

`path` is sanitized by making it relative, removing
consecutive slashes (e.g. '//'), removing '.' elements,
and removing trailing slashes.

def make_path_safe(path):
"""Make path safe by making it relative and local"""
return _safe_re.sub("", path) or "."
For reasons of security, a ValueError is raised should
`path` contain any '..' elements.
"""
path = path.lstrip("/")
if "\\" in path: # borg always wants slashes, never backslashes.
raise ValueError(f"unexpected backslash(es) in path {path!r}")
if path.startswith("../") or "/../" in path or path.endswith("/..") or path == "..":
raise ValueError(f"unexpected '..' element in path {path!r}")
path = os.path.normpath(path)
ThomasWaldmann marked this conversation as resolved.
Show resolved Hide resolved
return path


_dotdot_re = re.compile(r"^(\.\./)+")


def remove_dotdot_prefixes(path):
"""
Remove '../'s at the beginning of `path`. Additionally,
the path is made relative.

`path` is expected to be normalized already (e.g. via `os.path.normpath()`).
"""
path = path.lstrip("/")
path = _dotdot_re.sub("", path)
if path in ["", ".."]:
return "."
return path
Comment on lines +251 to +254
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, i am asking myself whether this is useful, it might completely change the path so it points somewhere else.

maybe rather reject than modify-and-accept?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which kind of paths should be accepted/rejected? I assume we still want to allow users to specify absolute paths and simple relative paths like a/b/c or ./a/b/c. So, are you suggesting to refuse any ../some/path?

Note that the regex starts with ^ and we only remove prefixes from the path.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Guess we can approach this from 2 perspectives:

a) what does your PR change compared to the existing code

for archiving items the new remove_dotdot_prefixes is quite close to what make_path_safe did, except that it removes leading slashes (which we must do somewhere) and handles the special cases of "nothing left" ("" and ".."), which is also fine (except that a mere "." as archived path is not really useful for extraction)

b) what do we really want it to be

usually borg recurses starting from the recursion roots (which we can normalize first) and then only pretty normal paths will be generated by the recursion. for this, we do not need special path processing per item.

when fed with a paths list via stdin (and not using borg's recursor), borg does not have control over what's coming in from there, but borg is also not required to accept too crappy stuff and still make great sense from it (the admin or tool feeding that list into borg can be expected to provide reasonable paths), borg instead could skip invalid paths with an error msg.

>>> normpath("root/foo/../bar")
'root/bar'  # definitely valid, it's inside the root

>>> normpath("root/foo/../../bar")
'bar'  # slightly crappy, but we could archive this correctly (sibling of root)

>>> normpath("root/foo/../../../bar")
'../bar'  # we can not archive this as is (../ unwanted due to security) and if we strip the ../ we change the meaning.

So: if normpath(p).startswith("../"): reject.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

except that a mere "." as archived path is not really useful for extraction

I'm not sure I agree with it not being useful. When I create an archive for '.', I'd also expect that it contains '.' (user, permissions, attributes). So, that I can extract it again, move it to it's original location and permission are correct again.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when fed with a paths list via stdin (and not using borg's recursor), borg does not have control over what's coming in from there, but borg is also not required to accept too crappy stuff and still make great sense from it (the admin or tool feeding that list into borg can be expected to provide reasonable paths), borg instead could skip invalid paths with an error msg.

I, personally, wouldn't give this too much consideration. Seems unlikely to me that people will end up providing such crappy paths to Borg. Paths with './' or '//', sure, but something like "root/foo/../../../bar" seems rather unlikely to me.

Copy link
Contributor Author

@pgerber pgerber Nov 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Handling of '.' we should perhaps be looked at separately. There are some cases where it isn't handled ideally:

user@dev-borg:~$ borg list 85
-rw-r--r-- root   root        905 Sun, 2022-04-10 12:08:45  etc/fstab
user@dev-borg:~$ borg create --paths-from-stdin 86 <<<$'/etc/fstab\n'
user@dev-borg:~$ borg list 86
-rw-r--r-- root   root        905 Sun, 2022-04-10 12:08:45  etc/fstab
drwx------ user   user          0 Sun, 2022-11-13 18:19:19  .

Empty line at end is interpreted as '.' somehow.

Probably related to this:

In [3]: os.path.normpath('')
Out[3]: '.'

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, likely.

about archiving "." directory making sense: yes, i somehow agree, but how would you ever extract that? borg usually expects nothing being in the way, so it usually rmdirs target and then extracts target. not sure whether that would work with target == ".".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Guess the main issue remaining here is that the code removes arbitrary amounts of leading ../.
So we basically archive something and lose information about its original path.

We won't remember whether it was ../foo or ../../foo or whatever other upwards directory level.

The original code before your PR also had this issue, but guess if we clean it up, we should do it right.

I guess the only context where this could make sense is if we want "no warnings / no errors" borg1 archive transfer and accept that information loss (preferring it over introducing some security issue into borg2 archives).

So, in case this is only meant for borg transfer, maybe it could be put there so it won't stay here forever?

For borg2 "borg create", I guess we rather want to reject if something starts with ../.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, thought more about this, and guess I have to correct myself:

  • "losing information" --> yes, we remove "../../../", so we lose that. But: that is not super useful as borg does not track the cwd. The admin might know and it might be reconstructable from scripting etc., but it is not much we lose.
  • Also, and maybe more important: by using some "../" to point to parent dirs, the resulting paths usually get longer, so they gain information. e.g. if cwd is "/etc", if we refer to "passwd" we just have that. if we refer to "../etc/passwd" and then remove "../" from that, we have "etc/passwd", which is more.



def assert_sanitized_path(path):
assert isinstance(path, str)
# `path` should have been sanitized earlier. Some features,
# like pattern matching rely on a sanitized path. As a
# precaution we check here again.
if make_path_safe(path) != path:
raise ValueError(f"path {path!r} is not sanitized")
return path


def to_sanitized_path(path):
ThomasWaldmann marked this conversation as resolved.
Show resolved Hide resolved
assert isinstance(path, str)
# Legacy versions of Borg still allowed non-sanitized paths
# to be stored. So, we sanitize them when reading.
#
# Borg 2 ensures paths are safe before storing them. Thus, when
# support for reading Borg 1 archives is dropped, this should be
# changed to a simple check to verify paths aren't malicious.
# Namely, absolute paths and paths containing '..' elements must
# be rejected.
#
# Also checks for '..' elements in `path` for reasons of security.
return make_path_safe(path)


class HardLinkManager:
Expand Down
29 changes: 27 additions & 2 deletions src/borg/helpers/parseformat.py
Expand Up @@ -19,7 +19,7 @@
logger = create_logger()

from .errors import Error
from .fs import get_keys_dir
from .fs import get_keys_dir, make_path_safe
from .msgpack import Timestamp
from .time import OutputTimestamp, format_time, safe_timestamp
from .. import __version__ as borg_version
Expand Down Expand Up @@ -840,7 +840,7 @@ class FakeArchive:

from ..item import Item

fake_item = Item(mode=0, path="", user="", group="", mtime=0, uid=0, gid=0)
fake_item = Item(mode=0, path="foo", user="", group="", mtime=0, uid=0, gid=0)
formatter = cls(FakeArchive, "")
keys = []
keys.extend(formatter.call_keys.keys())
Expand Down Expand Up @@ -1147,3 +1147,28 @@ def decode(d):
return res

return decode(d)


class Highlander(argparse.Action):
"""make sure some option is only given once"""

def __init__(self, *args, **kwargs):
self.__called = False
super().__init__(*args, **kwargs)

def __call__(self, parser, namespace, values, option_string=None):
if self.__called:
raise argparse.ArgumentError(self, "There can be only one.")
self.__called = True
setattr(namespace, self.dest, values)


class MakePathSafeAction(Highlander):
def __call__(self, parser, namespace, path, option_string=None):
try:
sanitized_path = make_path_safe(path)
except ValueError as e:
raise argparse.ArgumentError(self, e)
if sanitized_path == ".":
raise argparse.ArgumentError(self, f"{path!r} is not a valid file name")
setattr(namespace, self.dest, sanitized_path)
3 changes: 2 additions & 1 deletion src/borg/item.pyx
Expand Up @@ -7,6 +7,7 @@ from cpython.bytes cimport PyBytes_AsStringAndSize
from .constants import ITEM_KEYS, ARCHIVE_KEYS
from .helpers import StableDict
from .helpers import format_file_size
from .helpers.fs import assert_sanitized_path, to_sanitized_path
from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp
from .helpers.time import OutputTimestamp, safe_timestamp

Expand Down Expand Up @@ -262,7 +263,7 @@ cdef class Item(PropDict):

# properties statically defined, so that IDEs can know their names:

path = PropDictProperty(str, 'surrogate-escaped str')
path = PropDictProperty(str, 'surrogate-escaped str', encode=assert_sanitized_path, decode=to_sanitized_path)
ThomasWaldmann marked this conversation as resolved.
Show resolved Hide resolved
source = PropDictProperty(str, 'surrogate-escaped str') # legacy borg 1.x. borg 2: see .target
target = PropDictProperty(str, 'surrogate-escaped str')
user = PropDictProperty(str, 'surrogate-escaped str')
Expand Down
19 changes: 19 additions & 0 deletions src/borg/testsuite/__init__.py
Expand Up @@ -23,6 +23,7 @@
from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
from .. import platform


# Note: this is used by borg.selftest, do not use or import py.test functionality here.

from ..fuse_impl import llfuse, has_pyfuse3, has_llfuse
Expand Down Expand Up @@ -61,6 +62,24 @@ def same_ts_ns(ts_ns1, ts_ns2):
return diff_ts <= diff_max


rejected_dotdot_paths = (
"..",
"../",
"../etc/shadow",
"/..",
"/../",
"/../etc",
"/../etc/",
"etc/..",
"/etc/..",
"/etc/../etc/shadow",
"//etc/..",
"etc//..",
"etc/..//",
"foo/../bar",
ThomasWaldmann marked this conversation as resolved.
Show resolved Hide resolved
)


@contextmanager
def unopened_tempfile():
with tempfile.TemporaryDirectory() as tempdir:
Expand Down
7 changes: 7 additions & 0 deletions src/borg/testsuite/archive.py
Expand Up @@ -8,6 +8,7 @@
import pytest

from . import BaseTestCase
from . import rejected_dotdot_paths
from ..crypto.key import PlaintextKey
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
from ..archive import BackupOSError, backup_io, backup_io_iter, get_item_uid_gid
Expand Down Expand Up @@ -394,3 +395,9 @@ def test_get_item_uid_gid():
# as there is nothing, it'll fall back to uid_default/gid_default.
assert uid == 0
assert gid == 16


def test_reject_non_sanitized_item():
for path in rejected_dotdot_paths:
with pytest.raises(ValueError, match="unexpected '..' element in path"):
Item(path=path, user="root", group="root")
31 changes: 29 additions & 2 deletions src/borg/testsuite/archiver/create_cmd.py
Expand Up @@ -279,6 +279,33 @@ def test_create_no_permission_file(self):
assert "input/file2" not in out # it skipped file2
assert "input/file3" in out

def test_sanitized_stdin_name(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
self.cmd(f"--repo={self.repository_location}", "create", "--stdin-name", "./a//path", "test", "-", input=b"")
item = json.loads(self.cmd(f"--repo={self.repository_location}", "list", "test", "--json-lines"))
assert item["path"] == "a/path"

def test_dotdot_stdin_name(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
output = self.cmd(
f"--repo={self.repository_location}",
"create",
"--stdin-name",
"foo/../bar",
"test",
"-",
input=b"",
exit_code=2,
)
assert output.endswith("'..' element in path 'foo/../bar'" + os.linesep)

def test_dot_stdin_name(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
output = self.cmd(
f"--repo={self.repository_location}", "create", "--stdin-name", "./", "test", "-", input=b"", exit_code=2
)
assert output.endswith("'./' is not a valid file name" + os.linesep)

def test_create_content_from_command(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
input_data = "some test content"
Expand Down Expand Up @@ -586,7 +613,7 @@ def test_exclude_keep_tagged(self):
)
self._assert_test_keep_tagged()

def test_path_normalization(self):
def test_path_sanitation(self):
ThomasWaldmann marked this conversation as resolved.
Show resolved Hide resolved
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
self.create_regular_file("dir1/dir2/file", size=1024 * 80)
with changedir("input/dir1/dir2"):
Expand All @@ -595,7 +622,7 @@ def test_path_normalization(self):
self.assert_not_in("..", output)
self.assert_in(" input/dir1/dir2/file", output)

def test_exclude_normalization(self):
def test_exclude_sanitation(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
self.create_regular_file("file1", size=1024 * 80)
self.create_regular_file("file2", size=1024 * 80)
Expand Down
Binary file added src/borg/testsuite/archiver/dotdot_path.tar
Binary file not shown.
28 changes: 28 additions & 0 deletions src/borg/testsuite/archiver/tar_cmds.py
Expand Up @@ -129,6 +129,29 @@ def test_import_tar(self, tar_format="PAX"):
self.cmd(f"--repo={self.repository_location}", "extract", "dst")
self.assert_dirs_equal("input", "output/input", ignore_ns=True, ignore_xattrs=True)

def test_import_unusual_tar(self):
# Contains these, unusual entries:
# /foobar
# ./bar
# ./foo2/
# ./foo//bar
# ./
tar_archive = os.path.join(os.path.dirname(__file__), "unusual_paths.tar")

self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none")
self.cmd(f"--repo={self.repository_location}", "import-tar", "dst", tar_archive)
files = self.cmd(f"--repo={self.repository_location}", "list", "dst", "--format", "{path}{NL}").splitlines()
self.assert_equal(set(files), {"foobar", "bar", "foo2", "foo/bar", "."})

def test_import_tar_with_dotdot(self):
# Contains this file:
# ../../../../etc/shadow
tar_archive = os.path.join(os.path.dirname(__file__), "dotdot_path.tar")

self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none")
with pytest.raises(ValueError, match="unexpected '..' element in path '../../../../etc/shadow'"):
self.cmd(f"--repo={self.repository_location}", "import-tar", "dst", tar_archive, exit_code=2)

@requires_gzip
def test_import_tar_gz(self, tar_format="GNU"):
if not shutil.which("gzip"):
Expand Down Expand Up @@ -212,3 +235,8 @@ class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):
@unittest.skipUnless("binary" in BORG_EXES, "no borg.exe available")
class ArchiverTestCaseBinary(ArchiverTestCaseBinaryBase, ArchiverTestCase):
"""runs the same tests, but via the borg binary"""

@unittest.skip("does not work with binaries")
def test_import_tar_with_dotdot(self):
# the test checks for a raised exception. that can't work if the code runs in a separate process.
pass
Binary file added src/borg/testsuite/archiver/unusual_paths.tar
Binary file not shown.