Skip to content

Commit

Permalink
extract: --skip-errors ignores corrupted chunks (w/ log message), see #…
Browse files Browse the repository at this point in the history
…840

Forward port of a change implemented by @enkore back in 2016:

enkore@09b21b1
  • Loading branch information
ThomasWaldmann committed Nov 5, 2023
1 parent 1f48e50 commit ec1937d
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 18 deletions.
57 changes: 42 additions & 15 deletions src/borg/archive.py
Expand Up @@ -788,6 +788,7 @@ def extract_item(
hlm=None,
pi=None,
continue_extraction=False,
skip_integrity_errors=False,
):
"""
Extract archive item.
Expand All @@ -800,6 +801,8 @@ def extract_item(
:param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
:param continue_extraction: continue a previously interrupted extraction of same archive
:param skip_integrity_errors: skip over corrupted chunks instead of raising IntegrityError
(ignored for dry_run and stdout)
"""

def same_item(item, st):
Expand Down Expand Up @@ -849,15 +852,15 @@ def same_item(item, st):
)
if has_damaged_chunks:
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
return
return True

dest = self.cwd
path = os.path.join(dest, item.path)
# Attempt to remove existing files, ignore errors on failure
try:
st = os.stat(path, follow_symlinks=False)
if continue_extraction and same_item(item, st):
return # done! we already have fully extracted this file in a previous run.
return True # done! we already have fully extracted this file in a previous run.
elif stat.S_ISDIR(st.st_mode):
os.rmdir(path)
else:
Expand All @@ -878,20 +881,43 @@ def make_parent(path):
make_parent(path)
with self.extract_helper(item, path, hlm) as hardlink_set:
if hardlink_set:
return
return True
with backup_io("open"):
fd = open(path, "wb")
with fd:
ids = [c.id for c in item.chunks]
for data in self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM):
chunk_index = -1
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM)
skipped_errors = False
while True:
try:
chunk_index += 1
data = next(chunk_iterator)
except StopIteration:
break
except IntegrityError as err:
if not skip_integrity_errors:
raise
c = item.chunks[chunk_index]
size = c.size
logger.warning("%s: chunk %s: %s", remove_surrogates(item.path), bin_to_hex(c.id), err)
with backup_io("seek"):
fd.seek(size, 1)
skipped_errors = True
# restart chunk data generator
ids = [c.id for c in item.chunks[chunk_index + 1 :]]
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM)
else:
with backup_io("write"):
size = len(data)
if sparse and zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(size, 1)
else:
fd.write(data)
if pi:
pi.show(increase=len(data), info=[remove_surrogates(item.path)])
with backup_io("write"):
if sparse and zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
else:
fd.write(data)
pi.show(increase=size, info=[remove_surrogates(item.path)])

with backup_io("truncate_and_attrs"):
pos = item_chunks_size = fd.tell()
fd.truncate(pos)
Expand All @@ -905,7 +931,7 @@ def make_parent(path):
)
if has_damaged_chunks:
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
return
return not skipped_errors
with backup_io:
# No repository access beyond this point.
if stat.S_ISDIR(mode):
Expand All @@ -919,7 +945,7 @@ def make_parent(path):
with self.extract_helper(item, path, hlm) as hardlink_set:
if hardlink_set:
# unusual, but possible: this is a hardlinked symlink.
return
return True
target = item.target
try:
os.symlink(target, path)
Expand All @@ -930,18 +956,19 @@ def make_parent(path):
make_parent(path)
with self.extract_helper(item, path, hlm) as hardlink_set:
if hardlink_set:
return
return True
os.mkfifo(path)
self.restore_attrs(path, item)
elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
make_parent(path)
with self.extract_helper(item, path, hlm) as hardlink_set:
if hardlink_set:
return
return True
os.mknod(path, item.mode, item.rdev)
self.restore_attrs(path, item)
else:
raise Exception("Unknown archive item type %r" % item.mode)
return True

def restore_attrs(self, path, item, symlink=False, fd=None):
"""
Expand Down
21 changes: 18 additions & 3 deletions src/borg/archiver/extract_cmd.py
Expand Up @@ -39,6 +39,7 @@ def do_extract(self, args, repository, manifest, archive):
progress = args.progress
output_list = args.output_list
dry_run = args.dry_run
skip_errors = args.skip_errors
stdout = args.stdout
sparse = args.sparse
strip_components = args.strip_components
Expand Down Expand Up @@ -76,9 +77,16 @@ def do_extract(self, args, repository, manifest, archive):
dirs.append(item)
archive.extract_item(item, stdout=stdout, restore_attrs=False)
else:
archive.extract_item(
item, stdout=stdout, sparse=sparse, hlm=hlm, pi=pi, continue_extraction=continue_extraction
)
if not archive.extract_item(
item,
stdout=stdout,
sparse=sparse,
hlm=hlm,
pi=pi,
continue_extraction=continue_extraction,
skip_integrity_errors=skip_errors,
):
self.exit_code = EXIT_WARNING
except (BackupOSError, BackupError) as e:
self.print_warning("%s: %s", remove_surrogates(orig_path), e)

Expand Down Expand Up @@ -175,6 +183,13 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser):
action="store_true",
help="continue a previously interrupted extraction of same archive",
)
subparser.add_argument(
"--skip-errors",
dest="skip_errors",
action="store_true",
help="skip corrupted chunks with a log message (exit 1) instead of aborting "
"(no effect for --dry-run and --stdout)",
)
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
subparser.add_argument(
"paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported"
Expand Down
22 changes: 22 additions & 0 deletions src/borg/testsuite/archiver/extract_cmd.py
Expand Up @@ -625,6 +625,28 @@ def test_overwrite(archivers, request):
cmd(archiver, "extract", "test", exit_code=1)


def test_extract_skip_errors(archivers, request):
archiver = request.getfixturevalue(archivers)
create_regular_file(archiver.input_path, "file1", contents=b"a" * 280 + b"b" * 280)
cmd(archiver, "rcreate", "-e" "none")
cmd(archiver, "create", "--chunker-params", "7,9,8,128", "test", "input")
segment_files = sorted(os.listdir(os.path.join(archiver.repository_path, "data", "0")), reverse=True)
print(
", ".join(
f"{fn}: {os.stat(os.path.join(archiver.repository_path, 'data', '0', fn)).st_size}b" for fn in segment_files
)
)
name = segment_files[3] # must be the segment file that has the file's chunks
with open(os.path.join(archiver.repository_path, "data", "0", name), "r+b") as fd:
fd.seek(100)
fd.write(b"XXXX")
with changedir("output"):
output = cmd(archiver, "extract", "--skip-errors", "test", exit_code=1)
assert "input/file1: chunk" in output
assert os.stat("input/file1").st_size == 560
cmd(archiver, "check", exit_code=1)


# derived from test_extract_xattrs_errors()
@pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason="xattr not supported on this system, or this version of fakeroot")
def test_do_not_fail_when_percent_is_in_xattr_name(archivers, request):
Expand Down

0 comments on commit ec1937d

Please sign in to comment.