Skip to content

Commit

Permalink
Improvements to syncutil and repo-cp
Browse files Browse the repository at this point in the history
* check overwrite and bail out right away if object exists and should
  not be overwritten
* improve logging configuration and handling
* ignore whitespace when reading pids from file
  • Loading branch information
rlskoeser committed Aug 15, 2016
1 parent 0c491df commit 03b5cd6
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 15 deletions.
22 changes: 12 additions & 10 deletions eulfedora/syncutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ def sync_object(src_obj, dest_repo, export_context='migrate',
# NOTE: currently exceptions are expected to be handled by the
# calling method; see repo-cp script for an example

# if overwrite is not requested, check first and bail out
dest_obj = dest_repo.get_object(src_obj.pid)
if not overwrite and dest_obj.exists:
logger.info('%s exists in destination repo and no overwrite; skipping',
src_obj.pid)
return False

if show_progress and progressbar:
# calculate rough estimate of object size
size_estimate = estimate_object_size(src_obj,
Expand Down Expand Up @@ -121,14 +128,9 @@ def data(self):
export_data = (re.sub(checksum_re, '', chunk)
for chunk in export_data)


dest_obj = dest_repo.get_object(src_obj.pid)
if dest_obj.exists:
if overwrite:
dest_repo.purge_object(src_obj.pid)
else:
# exception ?
return False
if overwrite and dest_obj.exists:
print 'overwriting'
dest_repo.purge_object(src_obj.pid)

result = dest_repo.ingest(export_data)
if pbar:
Expand Down Expand Up @@ -462,8 +464,6 @@ def encoded_datastream(self):
yield decoded_content




def binarycontent_sections(chunk):
'''Split a chunk of data into sections by start and end binary
content tags.'''
Expand Down Expand Up @@ -515,6 +515,7 @@ def estimate_object_size(obj, archive=True):

return size_estimate


def base64_size(input_size):
# from http://stackoverflow.com/questions/1533113/calculate-the-size-to-a-base-64-encoded-message
adjustment = 3 - (input_size % 3) if (input_size % 3) else 0
Expand All @@ -533,6 +534,7 @@ def humanize_file_size(size):
p = math.floor(math.log(size, 2)/10)
return "%.2f%s" % (size/math.pow(1024, p), units[int(p)])


def endswith_partial(text, partial_str):
'''Check if the text ends with any partial version of the
specified string.'''
Expand Down
24 changes: 19 additions & 5 deletions scripts/repo-cp
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ def repo_copy():
pids = args.pids
elif args.file:
with open(args.file) as pidlistfile:
pids = pidlistfile.read().splitlines()
# allow whitespace on front or end of pid, for convenience
pids = [p.strip() for p in pidlistfile.read().splitlines()]
else:
print('Specify either one or more pids or a file with a list of pids')
parser.print_help()
Expand All @@ -147,6 +148,7 @@ def repo_copy():
allow_overwrite = cfg.has_option(args.dest, 'allow_overwrite') and \
cfg.getboolean(args.dest, 'allow_overwrite')

# configure logging based on verbosity level requested
logging.config.dictConfig(get_logging_config(args.verbosity))

for pid in pids:
Expand All @@ -156,7 +158,13 @@ def repo_copy():
if isinstance(src_repo, Airlock):
with open(pid, 'rb') as export:
result = dest_repo.ingest(export)
print('%s copied' % result)
if result:
print('%s copied' % result)
else:
# false means not copied but no error; currently means
# skipped because object exists and overwrite not allowed
print('%s skipped' % pid)

continue

src_obj = src_repo.get_object(pid)
Expand All @@ -178,7 +186,12 @@ def repo_copy():
overwrite=allow_overwrite, show_progress=args.progress,
requires_auth=args.requires_auth, omit_checksums=args.omit_checksums,
verify=args.verify)
print('%s copied' % result)
if result:
print('%s copied' % result)
else:
# false means not copied but no error; currently means
# skipped because object exists and overwrite not allowed
print('%s skipped' % pid)
except ChecksumMismatch:
print('ChecksumMismatch on %s' % pid)

Expand All @@ -199,12 +212,12 @@ def get_logging_config(level):
'disable_existing_loggers': True,
'formatters': {
'simple': {
'format': '%(levelname)s: %(message)s',
'format': '%(message)s',
},
},
'handlers': {
'console': {
'level': level,
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'simple'
},
Expand All @@ -218,6 +231,7 @@ def get_logging_config(level):
}
}


def is_airlock(cfg, name):
# if a name is not in the config file and is a directory,
# consider it an airlock
Expand Down

0 comments on commit 03b5cd6

Please sign in to comment.