Skip to content

Commit

Permalink
Sanitize paths during archive creation/extraction/...
Browse files Browse the repository at this point in the history
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:

$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root   root          7 Sun, 2022-10-23 19:14:27  x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'

Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.

This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.

With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.

I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:

a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.

Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.

This also (partially) fixes #7099.
  • Loading branch information
pgerber authored and ThomasWaldmann committed Jun 7, 2023
1 parent ac4337a commit 438cf2e
Show file tree
Hide file tree
Showing 14 changed files with 198 additions and 36 deletions.
8 changes: 3 additions & 5 deletions src/borg/archive.py
Expand Up @@ -32,7 +32,7 @@
from .platform import uid2user, user2uid, gid2group, group2gid
from .helpers import parse_timestamp, archive_ts_now
from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd
from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd, remove_dotdot_prefixes
from .helpers import StableDict
from .helpers import bin_to_hex
from .helpers import safe_ns
Expand Down Expand Up @@ -853,8 +853,6 @@ def same_item(item, st):
return

dest = self.cwd
if item.path.startswith(("/", "../")):
raise Exception("Path should be relative and local")
path = os.path.join(dest, item.path)
# Attempt to remove existing files, ignore errors on failure
try:
Expand Down Expand Up @@ -1376,8 +1374,8 @@ def __init__(

@contextmanager
def create_helper(self, path, st, status=None, hardlinkable=True):
safe_path = make_path_safe(path)
item = Item(path=safe_path)
sanitized_path = remove_dotdot_prefixes(path)
item = Item(path=sanitized_path)
hardlinked = hardlinkable and st.st_nlink > 1
hl_chunks = None
update_map = False
Expand Down
3 changes: 2 additions & 1 deletion src/borg/archiver/create_cmd.py
Expand Up @@ -27,6 +27,7 @@
from ..helpers import flags_root, flags_dir, flags_special_follow, flags_special
from ..helpers import sig_int, ignore_sigint
from ..helpers import iter_separated
from ..helpers import MakePathSafeAction
from ..manifest import Manifest
from ..patterns import PatternMatcher
from ..platform import is_win32
Expand Down Expand Up @@ -766,7 +767,7 @@ def build_parser_create(self, subparsers, common_parser, mid_common_parser):
metavar="NAME",
dest="stdin_name",
default="stdin",
action=Highlander,
action=MakePathSafeAction, # MakePathSafeAction maybe should subclass from Highlander.
help="use NAME in archive for stdin data (default: %(default)r)",
)
subparser.add_argument(
Expand Down
3 changes: 2 additions & 1 deletion src/borg/helpers/__init__.py
Expand Up @@ -13,7 +13,7 @@
from .errors import Error, ErrorWithTraceback, IntegrityError, DecompressionError
from .fs import ensure_dir, join_base_dir, get_socket_filename
from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
from .fs import dir_is_tagged, dir_is_cachedir, make_path_safe, scandir_inorder
from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, umount
from .fs import O_, flags_root, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime
from .fs import HardLinkManager
Expand All @@ -31,6 +31,7 @@
from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status
from .parseformat import swidth_slice, ellipsis_truncate
from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict
from .parseformat import MakePathSafeAction
from .process import daemonize, daemonizing
from .process import signal_handler, raising_signal_handler, sig_int, ignore_sigint, SigHup, SigTerm
from .process import popen_with_error_handling, is_terminal, prepare_subprocess_env, create_filter_process
Expand Down
60 changes: 56 additions & 4 deletions src/borg/helpers/fs.py
Expand Up @@ -217,12 +217,64 @@ def dir_is_tagged(path, exclude_caches, exclude_if_present):
return tag_names


_safe_re = re.compile(r"^((\.\.)?/+)+")
def make_path_safe(path):
"""
Make path safe by making it relative and normalized.
`path` is sanitized by making it relative, removing
consecutive slashes (e.g. '//'), removing '.' elements,
and removing trailing slashes.
def make_path_safe(path):
"""Make path safe by making it relative and local"""
return _safe_re.sub("", path) or "."
For reasons of security, a ValueError is raised should
`path` contain any '..' elements.
"""
path = path.lstrip("/")
if path.startswith("../") or "/../" in path or path.endswith("/..") or path == "..":
raise ValueError(f"unexpected '..' element in path {path!r}")
path = os.path.normpath(path)
return path


_dotdot_re = re.compile(r"^(\.\./)+")


def remove_dotdot_prefixes(path):
"""
Remove '../'s at the beginning of `path`. Additionally,
the path is made relative.
`path` is expected to be normalized already (e.g. via `os.path.normpath()`).
"""
path = path.lstrip("/")
path = _dotdot_re.sub("", path)
if path in ["", ".."]:
return "."
return path


def assert_sanitized_path(path):
assert isinstance(path, str)
# `path` should have been sanitized earlier. Some features,
# like pattern matching rely on a sanitized path. As a
# precaution we check here again.
if make_path_safe(path) != path:
raise ValueError(f"path {path!r} is not sanitized")
return path


def to_sanitized_path(path):
assert isinstance(path, str)
# Legacy versions of Borg still allowed non-sanitized paths
# to be stored. So, we sanitize them when reading.
#
# Borg 2 ensures paths are safe before storing them. Thus, when
# support for reading Borg 1 archives is dropped, this should be
# changed to a simple check to verify paths aren't malicious.
# Namely, absolute paths and paths containing '..' elements must
# be rejected.
#
# Also checks for '..' elements in `path` for reasons of security.
return make_path_safe(path)


class HardLinkManager:
Expand Down
15 changes: 13 additions & 2 deletions src/borg/helpers/parseformat.py
Expand Up @@ -19,7 +19,7 @@
logger = create_logger()

from .errors import Error
from .fs import get_keys_dir
from .fs import get_keys_dir, make_path_safe
from .msgpack import Timestamp
from .time import OutputTimestamp, format_time, safe_timestamp
from .. import __version__ as borg_version
Expand Down Expand Up @@ -840,7 +840,7 @@ class FakeArchive:

from ..item import Item

fake_item = Item(mode=0, path="", user="", group="", mtime=0, uid=0, gid=0)
fake_item = Item(mode=0, path="foo", user="", group="", mtime=0, uid=0, gid=0)
formatter = cls(FakeArchive, "")
keys = []
keys.extend(formatter.call_keys.keys())
Expand Down Expand Up @@ -1147,3 +1147,14 @@ def decode(d):
return res

return decode(d)


class MakePathSafeAction(argparse.Action):
def __call__(self, parser, namespace, path, option_string=None):
try:
sanitized_path = make_path_safe(path)
except ValueError as e:
raise argparse.ArgumentError(self, e)
if sanitized_path == ".":
raise argparse.ArgumentError(self, f"{path!r} is not a valid file name")
setattr(namespace, self.dest, sanitized_path)
3 changes: 2 additions & 1 deletion src/borg/item.pyx
Expand Up @@ -7,6 +7,7 @@ from cpython.bytes cimport PyBytes_AsStringAndSize
from .constants import ITEM_KEYS, ARCHIVE_KEYS
from .helpers import StableDict
from .helpers import format_file_size
from .helpers.fs import assert_sanitized_path, to_sanitized_path
from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp
from .helpers.time import OutputTimestamp, safe_timestamp

Expand Down Expand Up @@ -262,7 +263,7 @@ cdef class Item(PropDict):

# properties statically defined, so that IDEs can know their names:

path = PropDictProperty(str, 'surrogate-escaped str')
path = PropDictProperty(str, 'surrogate-escaped str', encode=assert_sanitized_path, decode=to_sanitized_path)
source = PropDictProperty(str, 'surrogate-escaped str') # legacy borg 1.x. borg 2: see .target
target = PropDictProperty(str, 'surrogate-escaped str')
user = PropDictProperty(str, 'surrogate-escaped str')
Expand Down
19 changes: 19 additions & 0 deletions src/borg/testsuite/__init__.py
Expand Up @@ -23,6 +23,7 @@
from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
from .. import platform


# Note: this is used by borg.selftest, do not use or import py.test functionality here.

from ..fuse_impl import llfuse, has_pyfuse3, has_llfuse
Expand Down Expand Up @@ -61,6 +62,24 @@ def same_ts_ns(ts_ns1, ts_ns2):
return diff_ts <= diff_max


rejected_dotdot_paths = (
"..",
"../",
"../etc/shadow",
"/..",
"/../",
"/../etc",
"/../etc/",
"etc/..",
"/etc/..",
"/etc/../etc/shadow",
"//etc/..",
"etc//..",
"etc/..//",
"foo/../bar",
)


@contextmanager
def unopened_tempfile():
with tempfile.TemporaryDirectory() as tempdir:
Expand Down
7 changes: 7 additions & 0 deletions src/borg/testsuite/archive.py
Expand Up @@ -8,6 +8,7 @@
import pytest

from . import BaseTestCase
from . import rejected_dotdot_paths
from ..crypto.key import PlaintextKey
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
from ..archive import BackupOSError, backup_io, backup_io_iter, get_item_uid_gid
Expand Down Expand Up @@ -394,3 +395,9 @@ def test_get_item_uid_gid():
# as there is nothing, it'll fall back to uid_default/gid_default.
assert uid == 0
assert gid == 16


def test_reject_non_sanitized_item():
for path in rejected_dotdot_paths:
with pytest.raises(ValueError, match="unexpected '..' element in path"):
Item(path=path, user="root", group="root")
31 changes: 29 additions & 2 deletions src/borg/testsuite/archiver/create_cmd.py
Expand Up @@ -279,6 +279,33 @@ def test_create_no_permission_file(self):
assert "input/file2" not in out # it skipped file2
assert "input/file3" in out

def test_sanitized_stdin_name(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
self.cmd(f"--repo={self.repository_location}", "create", "--stdin-name", "./a//path", "test", "-", input=b"")
item = json.loads(self.cmd(f"--repo={self.repository_location}", "list", "test", "--json-lines"))
assert item["path"] == "a/path"

def test_dotdot_stdin_name(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
output = self.cmd(
f"--repo={self.repository_location}",
"create",
"--stdin-name",
"foo/../bar",
"test",
"-",
input=b"",
exit_code=2,
)
assert output.endswith("'..' element in path 'foo/../bar'" + os.linesep)

def test_dot_stdin_name(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
output = self.cmd(
f"--repo={self.repository_location}", "create", "--stdin-name", "./", "test", "-", input=b"", exit_code=2
)
assert output.endswith("'./' is not a valid file name" + os.linesep)

def test_create_content_from_command(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
input_data = "some test content"
Expand Down Expand Up @@ -586,7 +613,7 @@ def test_exclude_keep_tagged(self):
)
self._assert_test_keep_tagged()

def test_path_normalization(self):
def test_path_sanitation(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
self.create_regular_file("dir1/dir2/file", size=1024 * 80)
with changedir("input/dir1/dir2"):
Expand All @@ -595,7 +622,7 @@ def test_path_normalization(self):
self.assert_not_in("..", output)
self.assert_in(" input/dir1/dir2/file", output)

def test_exclude_normalization(self):
def test_exclude_sanitation(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
self.create_regular_file("file1", size=1024 * 80)
self.create_regular_file("file2", size=1024 * 80)
Expand Down
Binary file added src/borg/testsuite/archiver/dotdot_path.tar
Binary file not shown.
23 changes: 23 additions & 0 deletions src/borg/testsuite/archiver/tar_cmds.py
Expand Up @@ -129,6 +129,29 @@ def test_import_tar(self, tar_format="PAX"):
self.cmd(f"--repo={self.repository_location}", "extract", "dst")
self.assert_dirs_equal("input", "output/input", ignore_ns=True, ignore_xattrs=True)

def test_import_unusual_tar(self):
# Contains these, unusual entries:
# /foobar
# ./bar
# ./foo2/
# ./foo//bar
# ./
tar_archive = os.path.join(os.path.dirname(__file__), "unusual_paths.tar")

self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none")
self.cmd(f"--repo={self.repository_location}", "import-tar", "dst", tar_archive)
files = self.cmd(f"--repo={self.repository_location}", "list", "dst", "--format", "{path}{NL}").splitlines()
self.assert_equal(set(files), {"foobar", "bar", "foo2", "foo/bar", "."})

def test_import_tar_with_dotdot(self):
# Contains this file:
# ../../../../etc/shadow
tar_archive = os.path.join(os.path.dirname(__file__), "dotdot_path.tar")

self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none")
with pytest.raises(ValueError, match="unexpected '..' element in path '../../../../etc/shadow'"):
self.cmd(f"--repo={self.repository_location}", "import-tar", "dst", tar_archive, exit_code=2)

@requires_gzip
def test_import_tar_gz(self, tar_format="GNU"):
if not shutil.which("gzip"):
Expand Down
Binary file added src/borg/testsuite/archiver/unusual_paths.tar
Binary file not shown.
38 changes: 30 additions & 8 deletions src/borg/testsuite/helpers.py
Expand Up @@ -25,7 +25,7 @@
PlaceholderError,
replace_placeholders,
)
from ..helpers import make_path_safe, clean_lines
from ..helpers import remove_dotdot_prefixes, make_path_safe, clean_lines
from ..helpers import interval
from ..helpers import get_base_dir, get_cache_dir, get_keys_dir, get_security_dir, get_config_dir, get_runtime_dir
from ..helpers import is_slow_msgpack
Expand All @@ -48,6 +48,7 @@
from ..platform import is_cygwin, is_win32, is_darwin, swidth

from . import BaseTestCase, FakeInputs, are_hardlinks_supported
from . import rejected_dotdot_paths


def test_bin_to_hex():
Expand Down Expand Up @@ -393,16 +394,37 @@ def test_chunkerparams():
ChunkerParams("fixed,%d,%d" % (4096, MAX_DATA_SIZE + 1)) # too big header size


class RemoveDotdotPrefixesTestCase(BaseTestCase):
def test(self):
self.assert_equal(remove_dotdot_prefixes("."), ".")
self.assert_equal(remove_dotdot_prefixes(".."), ".")
self.assert_equal(remove_dotdot_prefixes("/"), ".")
self.assert_equal(remove_dotdot_prefixes("//"), ".")
self.assert_equal(remove_dotdot_prefixes("foo"), "foo")
self.assert_equal(remove_dotdot_prefixes("foo/bar"), "foo/bar")
self.assert_equal(remove_dotdot_prefixes("/foo/bar"), "foo/bar")
self.assert_equal(remove_dotdot_prefixes("../foo/bar"), "foo/bar")


class MakePathSafeTestCase(BaseTestCase):
def test(self):
self.assert_equal(make_path_safe("."), ".")
self.assert_equal(make_path_safe("./"), ".")
self.assert_equal(make_path_safe("./foo"), "foo")
self.assert_equal(make_path_safe(".//foo"), "foo")
self.assert_equal(make_path_safe(".//foo//bar//"), "foo/bar")
self.assert_equal(make_path_safe("/foo/bar"), "foo/bar")
self.assert_equal(make_path_safe("/foo/bar"), "foo/bar")
self.assert_equal(make_path_safe("/f/bar"), "f/bar")
self.assert_equal(make_path_safe("fo/bar"), "fo/bar")
self.assert_equal(make_path_safe("../foo/bar"), "foo/bar")
self.assert_equal(make_path_safe("../../foo/bar"), "foo/bar")
self.assert_equal(make_path_safe("/"), ".")
self.assert_equal(make_path_safe("/"), ".")
self.assert_equal(make_path_safe("//foo/bar"), "foo/bar")
self.assert_equal(make_path_safe("//foo/./bar"), "foo/bar")
self.assert_equal(make_path_safe(".test"), ".test")
self.assert_equal(make_path_safe(".test."), ".test.")
self.assert_equal(make_path_safe("..test.."), "..test..")
self.assert_equal(make_path_safe("/te..st/foo/bar"), "te..st/foo/bar")
self.assert_equal(make_path_safe("/..test../abc//"), "..test../abc")

for path in rejected_dotdot_paths:
with pytest.raises(ValueError, match="unexpected '..' element in path"):
make_path_safe(path)


class MockArchive:
Expand Down

0 comments on commit 438cf2e

Please sign in to comment.