From c2761a9950ae3bb404d1270bf0ff15803bf2c542 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 27 Mar 2023 20:02:47 +0200 Subject: [PATCH] extract: --skip-errors ignores corrupted chunks (w/ log message), see #840 Forward port of a change implemented by @enkore back in 2016: https://github.com/enkore/borg/commit/09b21b117c6d34032a9483dd82086a4fae532cb6 --- setup.cfg | 1 + src/borg/archive.py | 55 ++++++++++++++++------ src/borg/archiver/extract_cmd.py | 14 +++++- src/borg/testsuite/archiver/extract_cmd.py | 20 ++++++++ 4 files changed, 74 insertions(+), 16 deletions(-) diff --git a/setup.cfg b/setup.cfg index dbe6e9778b1..a5f965e9443 100644 --- a/setup.cfg +++ b/setup.cfg @@ -126,6 +126,7 @@ per_file_ignores = src/borg/archiver/debug_cmd.py:F405 src/borg/archiver/delete_cmd.py:F405 src/borg/archiver/diff_cmd.py:F405 + src/borg/archiver/extract_cmd.py:F405 src/borg/archiver/help_cmd.py:E501,F405 src/borg/archiver/key_cmds.py:F405 src/borg/archiver/prune_cmd.py:F405 diff --git a/src/borg/archive.py b/src/borg/archive.py index ecbbea5a138..5247ff597f2 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -791,6 +791,7 @@ def extract_item( stripped_components=0, original_path=None, pi=None, + skip_integrity_errors=False, ): """ Extract archive item. @@ -804,6 +805,8 @@ def extract_item( :param stripped_components: stripped leading path components to correct hard link extraction :param original_path: 'path' key as stored in archive :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes) + :param skip_integrity_errors: skip over corrupted chunks instead of raising IntegrityError + (ignored for dry_run and stdout) """ has_damaged_chunks = "chunks_healthy" in item if dry_run or stdout: @@ -832,7 +835,7 @@ def extract_item( ) if has_damaged_chunks: raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.") - return + return True original_path = original_path or item.path dest = self.cwd @@ -862,20 +865,43 @@ def make_parent(path): make_parent(path) with self.extract_helper(item, path, hlm) as hardlink_set: if hardlink_set: - return + return True with backup_io("open"): fd = open(path, "wb") with fd: ids = [c.id for c in item.chunks] - for data in self.pipeline.fetch_many(ids, is_preloaded=True): + chunk_index = -1 + chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True) + skipped_errors = False + while True: + try: + chunk_index += 1 + data = next(chunk_iterator) + except StopIteration: + break + except IntegrityError as err: + if not skip_integrity_errors: + raise + c = item.chunks[chunk_index] + size = c.size + logger.warning("%s: chunk %s: %s", remove_surrogates(item.path), bin_to_hex(c.id), err) + with backup_io("seek"): + fd.seek(size, 1) + skipped_errors = True + # restart chunk data generator + ids = [c.id for c in item.chunks[chunk_index + 1 :]] + chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True) + else: + with backup_io("write"): + size = len(data) + if sparse and zeros.startswith(data): + # all-zero chunk: create a hole in a sparse file + fd.seek(size, 1) + else: + fd.write(data) if pi: - pi.show(increase=len(data), info=[remove_surrogates(item.path)]) - with backup_io("write"): - if sparse and zeros.startswith(data): - # all-zero chunk: create a hole in a sparse file - fd.seek(len(data), 1) - else: - fd.write(data) + pi.show(increase=size, info=[remove_surrogates(item.path)]) + with backup_io("truncate_and_attrs"): pos = item_chunks_size = fd.tell() fd.truncate(pos) @@ -889,7 +915,7 @@ def make_parent(path): ) if has_damaged_chunks: raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.") - return + return not skipped_errors with backup_io: # No repository access beyond this point. if stat.S_ISDIR(mode): @@ -903,7 +929,7 @@ def make_parent(path): with self.extract_helper(item, path, hlm) as hardlink_set: if hardlink_set: # unusual, but possible: this is a hardlinked symlink. - return + return True target = item.target try: os.symlink(target, path) @@ -914,18 +940,19 @@ def make_parent(path): make_parent(path) with self.extract_helper(item, path, hlm) as hardlink_set: if hardlink_set: - return + return True os.mkfifo(path) self.restore_attrs(path, item) elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode): make_parent(path) with self.extract_helper(item, path, hlm) as hardlink_set: if hardlink_set: - return + return True os.mknod(path, item.mode, item.rdev) self.restore_attrs(path, item) else: raise Exception("Unknown archive item type %r" % item.mode) + return True def restore_attrs(self, path, item, symlink=False, fd=None): """ diff --git a/src/borg/archiver/extract_cmd.py b/src/borg/archiver/extract_cmd.py index a3fed094ee6..65ccf32898d 100644 --- a/src/borg/archiver/extract_cmd.py +++ b/src/borg/archiver/extract_cmd.py @@ -39,6 +39,7 @@ def do_extract(self, args, repository, manifest, archive): progress = args.progress output_list = args.output_list dry_run = args.dry_run + skip_errors = args.skip_errors stdout = args.stdout sparse = args.sparse strip_components = args.strip_components @@ -75,7 +76,7 @@ def do_extract(self, args, repository, manifest, archive): dirs.append(item) archive.extract_item(item, stdout=stdout, restore_attrs=False) else: - archive.extract_item( + if not archive.extract_item( item, stdout=stdout, sparse=sparse, @@ -83,7 +84,9 @@ def do_extract(self, args, repository, manifest, archive): stripped_components=strip_components, original_path=orig_path, pi=pi, - ) + skip_integrity_errors=skip_errors, + ): + self.exit_code = EXIT_WARNING except (BackupOSError, BackupError) as e: self.print_warning("%s: %s", remove_surrogates(orig_path), e) @@ -174,6 +177,13 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser): action="store_true", help="create holes in output sparse file from all-zero chunks", ) + subparser.add_argument( + "--skip-errors", + dest="skip_errors", + action="store_true", + help="skip corrupted chunks with a log message (exit 1) instead of aborting " + "(no effect for --dry-run and --stdout)", + ) subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name") subparser.add_argument( "paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported" diff --git a/src/borg/testsuite/archiver/extract_cmd.py b/src/borg/testsuite/archiver/extract_cmd.py index f3266d011fc..18f5b56dd0e 100644 --- a/src/borg/testsuite/archiver/extract_cmd.py +++ b/src/borg/testsuite/archiver/extract_cmd.py @@ -585,6 +585,26 @@ def test_overwrite(self): with changedir("output"): self.cmd(f"--repo={self.repository_location}", "extract", "test", exit_code=1) + def test_extract_skip_errors(self): + self.create_regular_file("file1", contents=b"a" * 280 + b"b" * 280) + self.cmd(f"--repo={self.repository_location}", "rcreate", "-e" "none") + self.cmd(f"--repo={self.repository_location}", "create", "--chunker-params", "7,9,8,128", "test", "input") + segment_files = sorted(os.listdir(os.path.join(self.repository_path, "data", "0")), reverse=True) + print( + ", ".join( + f"{fn}: {os.stat(os.path.join(self.repository_path, 'data', '0', fn)).st_size}b" for fn in segment_files + ) + ) + name = segment_files[3] # must be the segment file that has the file's chunks + with open(os.path.join(self.repository_path, "data", "0", name), "r+b") as fd: + fd.seek(100) + fd.write(b"XXXX") + with changedir("output"): + output = self.cmd(f"--repo={self.repository_location}", "extract", "--skip-errors", "test", exit_code=1) + assert "input/file1: chunk" in output + assert os.stat("input/file1").st_size == 560 + self.cmd(f"--repo={self.repository_location}", "check", exit_code=1) + # derived from test_extract_xattrs_errors() @pytest.mark.skipif( not xattr.XATTR_FAKEROOT, reason="xattr not supported on this system or on this version of fakeroot"