Skip to content

Commit

Permalink
extract: --skip-errors ignores corrupted chunks (w/ log message)
Browse files Browse the repository at this point in the history
  • Loading branch information
enkore committed Apr 11, 2016
1 parent 378140a commit 09b21b1
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 8 deletions.
37 changes: 31 additions & 6 deletions borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
parse_timestamp, to_localtime, format_time, format_timedelta, \
Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, \
PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, IntegrityError
from .repository import Repository
from .platform import acl_get, acl_set
from .chunker import Chunker
Expand Down Expand Up @@ -304,7 +304,7 @@ def add_file_chunks(chunks):
return stats

def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
hardlink_masters=None, original_path=None):
hardlink_masters=None, original_path=None, skip_integrity_errors=False):
"""
Extract archive item.
Expand All @@ -315,15 +315,18 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
:param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
:param original_path: b'path' key as stored in archive
:param skip_integrity_errors: skip over corrupted chunks instead of raising IntegrityError (ignored for
dry_run and stdout)
"""

if dry_run or stdout:
if b'chunks' in item:
for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
if stdout:
sys.stdout.buffer.write(data)
if stdout:
sys.stdout.buffer.flush()
return
return True

original_path = original_path or item[b'path']
dest = self.cwd
Expand Down Expand Up @@ -353,16 +356,36 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp
os.unlink(path)
if not hardlink_masters:
os.link(source, path)
return
return True
item[b'chunks'], link_target = hardlink_masters[item[b'source']]
if link_target:
# Hard link was extracted previously, just link
os.link(link_target, path)
return
return True
# Extract chunks, since the item which had the chunks was not extracted
with open(path, 'wb') as fd:
ids = [c[0] for c in item[b'chunks']]
for data in self.pipeline.fetch_many(ids, is_preloaded=True):
chunk_index = -1
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True)
skipped_errors = False
while True:
try:
chunk_index += 1
data = next(chunk_iterator)
except StopIteration:
break
except IntegrityError as ie:
if not skip_integrity_errors:
raise
chunk_id, size, _ = item[b'chunks'][chunk_index]
chunk_id = hexlify(chunk_id).decode('ascii')
logger.warning('%s: chunk %s: %s', remove_surrogates(item[b'path']), chunk_id, ie)
fd.seek(size, 1)
skipped_errors = True
# restart chunk data generator
ids = [c[0] for c in item[b'chunks'][chunk_index + 1:]]
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True)
continue
if sparse and self.zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
Expand All @@ -375,6 +398,7 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp
if hardlink_masters:
# Update master entry with extracted file path, so that following hardlinks don't extract twice.
hardlink_masters[item.get(b'source') or original_path] = (None, path)
return not skipped_errors
elif stat.S_ISDIR(mode):
if not os.path.exists(path):
os.makedirs(path)
Expand All @@ -401,6 +425,7 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp
self.restore_attrs(path, item)
else:
raise Exception('Unknown archive item type %r' % item[b'mode'])
return True

def restore_attrs(self, path, item, symlink=False, fd=None):
xattrs = item.get(b'xattrs', {})
Expand Down
11 changes: 9 additions & 2 deletions borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ def do_extract(self, args, repository, manifest, key, archive):

output_list = args.output_list
dry_run = args.dry_run
skip_errors = args.skip_errors
stdout = args.stdout
sparse = args.sparse
strip_components = args.strip_components
Expand Down Expand Up @@ -420,8 +421,10 @@ def item_is_hardlink_master(item):
dirs.append(item)
archive.extract_item(item, restore_attrs=False)
else:
archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
original_path=orig_path)
if not archive.extract_item(item, stdout=stdout, sparse=sparse,
hardlink_masters=hardlink_masters, original_path=orig_path,
skip_integrity_errors=skip_errors):
self.exit_code = EXIT_WARNING
except OSError as e:
self.print_warning('%s: %s', remove_surrogates(orig_path), e)

Expand Down Expand Up @@ -1359,6 +1362,10 @@ def build_parser(self, args=None, prog=None):
subparser.add_argument('--sparse', dest='sparse',
action='store_true', default=False,
help='create holes in output sparse file from all-zero chunks')
subparser.add_argument('--skip-errors', dest='skip_errors',
action='store_true', default=False,
help='skip corrupted chunks with a log message (exit 1) instead of aborting (no effect '
'for --dry-run and --stdout)')
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to extract')
Expand Down
16 changes: 16 additions & 0 deletions borg/testsuite/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,22 @@ def test_overwrite(self):
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', exit_code=1)

def test_extract_ignore_error(self):
with open(os.path.join(self.input_path, 'file1'), 'wb') as fd:
fd.write(b'a' * 280)
fd.write(b'b' * 280)
self.cmd('init', self.repository_location)
self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test', 'input')
name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0]
with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+b') as fd:
fd.seek(100)
fd.write(b'XXXX')
with changedir('output'):
output = self.cmd('extract', '--skip-errors', self.repository_location + '::test', exit_code=1)
assert 'input/file1: chunk' in output
assert os.stat('input/file1').st_size == 560
self.cmd('check', self.repository_location, exit_code=1)

def test_rename(self):
self.create_regular_file('file1', size=1024 * 80)
self.create_regular_file('dir2/file2', size=1024 * 80)
Expand Down

0 comments on commit 09b21b1

Please sign in to comment.