Skip to content

Commit

Permalink
Use utf-8 codec in premisrw
Browse files Browse the repository at this point in the history
This avoids the following issue:

    > return ' '.join([str(x) for x in texts])
                       ^^^^^^
    E UnicodeEncodeError: 'ascii' codec can't encode characters in
      position 42-44: ordinal not in range(128)

Only fixed in Py2. Py3 codepath will need more work overall.
  • Loading branch information
sevein committed Mar 8, 2019
1 parent f7ff13f commit f06008d
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions metsrw/plugins/premisrw/premis.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,12 +690,16 @@ def data_find_text(data, path):
simplified XPath ``path``.
"""
el = data_find(data, path)
if isinstance(el, (list, tuple)):
texts = [child for child in el[1:]
if not isinstance(child, (tuple, list, dict))]
if texts:
return ' '.join([str(x) for x in texts])
return None
if not isinstance(el, (list, tuple)):
return None
texts = [child for child in el[1:]
if not isinstance(child, (tuple, list, dict))]
if not texts:
return None
if six.PY2:
return ' '.join(
[x.encode('utf-8', errors='ignore') for x in texts])
return ' '.join([str(x) for x in texts])


def data_find_text_or_all(data, path, dyn_cls=False):
Expand Down

0 comments on commit f06008d

Please sign in to comment.