diff --git a/fixtures/mets_dir_with_fptrs.xml b/fixtures/mets_dir_with_fptrs.xml
new file mode 100644
index 0000000..a4f3ffc
--- /dev/null
+++ b/fixtures/mets_dir_with_fptrs.xml
@@ -0,0 +1,1309 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 3b822067-8878-418b-881e-96303a6f60d3
+
+ ingestion
+ 2012-01-17T20:16:38
+
+
+
+
+
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 60f8651a-ddd0-4048-85a2-dcf4f828e3a5
+
+ message digest calculation
+ 2012-01-17T20:16:38
+ program="python"; module="hashlib.sha256()"
+
+
+
+ 7bab5874a44f22e9fb7240cb70e674d12ae4d61a96db15e7cf0755d55db33d81
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ f9c5af55-e4a7-4e34-acf4-10a57e74cf70
+
+ virus check
+ 2012-01-17T20:16:58
+ program="Clam AV"; version="ClamAV 0.96.5"; virusDefinitions="14142/Mon Dec 19 09:17:31 2011
+"
+
+ Pass
+
+
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 2744faaf-2e9b-4efe-aec2-4a1e28a652d4
+
+ format identification
+ 2012-01-17T20:16:58
+ program="Droid"; version="3.0"
+
+ Positive
+
+ x-fmt/385
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ b828efeb-d285-496b-90e0-68e46213e4a0
+
+ format identification
+ 2012-01-17T20:16:58
+ program="Droid"; version="3.0"
+
+ Positive
+
+ x-fmt/386
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 111ff912-2590-4b66-ace9-24e58ea8be20
+
+ validation
+ 2012-01-17T20:16:58
+ program="Jhove"; version="1.5"
+
+ pass
+
+ format="bytestream"; result="Well-Formed and valid"
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 165ea4af-269b-40ca-9fa1-a3e042daafba
+
+ fixity check
+ 2012-01-17T20:20:16
+ program="python"; module="hashlib.sha256()"
+
+ Pass
+
+ 7bab5874a44f22e9fb7240cb70e674d12ae4d61a96db15e7cf0755d55db33d81verified
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ preservation system
+ Archivematica-0.8
+
+ Archivematica
+ software
+
+
+
+
+
+
+
+
+
+ repository code
+ ORG
+
+ Your Organization Name Here
+ organization
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UUID
+ df824ad3-3e35-4d90-be9c-f88d164814ce
+
+ ingestion
+ 2012-01-17T20:16:54
+
+
+
+
+
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ d8114ef7-5317-4351-bb3d-6d9081f977a9
+
+ message digest calculation
+ 2012-01-17T20:16:54
+ program="python"; module="hashlib.sha256()"
+
+
+
+ 7c91d344e65c549b2fcba47fb28adaaec3fa45354c5d2714e7a36e97ac72bf3f
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ b455b0cc-8b44-4b9a-b7c6-f73f34c458e4
+
+ virus check
+ 2012-01-17T20:16:41
+ program="Clam AV"; version="ClamAV 0.96.5"; virusDefinitions="14130/Fri Dec 16 17:32:22 2011
+"
+
+ Pass
+
+
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ d67cdd67-1469-4242-a07a-abf6aa8ffdc7
+
+ format identification
+ 2012-01-17T20:16:58
+ program="Droid"; version="3.0"
+
+ Positive
+
+ x-fmt/385
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ bdbf2da0-fa93-4a74-936c-65fc36aafe75
+
+ format identification
+ 2012-01-17T20:16:58
+ program="Droid"; version="3.0"
+
+ Positive
+
+ x-fmt/386
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ fe7539de-9abf-4e92-b1e2-5e0f4292cf3e
+
+ validation
+ 2012-01-17T20:16:58
+ program="Jhove"; version="1.5"
+
+ pass
+
+ format="bytestream"; result="Well-Formed and valid"
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 6c710d31-ac3e-45e0-8bde-fe0477b9463c
+
+ fixity check
+ 2012-01-17T20:20:00
+ program="python"; module="hashlib.sha256()"
+
+ Pass
+
+ 7c91d344e65c549b2fcba47fb28adaaec3fa45354c5d2714e7a36e97ac72bf3fverified
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ preservation system
+ Archivematica-0.8
+
+ Archivematica
+ software
+
+
+
+
+
+
+
+
+
+ repository code
+ ORG
+
+ Your Organization Name Here
+ organization
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 9d744150-2ea2-4995-97ad-ac9e6fcfa5d0
+
+ ingestion
+ 2012-01-17T20:16:54
+
+
+
+
+
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 8295cf03-44b6-4dbe-b1b1-b083c372ce28
+
+ message digest calculation
+ 2012-01-17T20:16:54
+ program="python"; module="hashlib.sha256()"
+
+
+
+ 0dff86ef77411a8f6bcaf453fab5f1c9c88bd4ed7da526827d982cf767eaacfb
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 2a5d15cb-9c1e-4ba9-9f31-c2e67d41f852
+
+ virus check
+ 2012-01-17T20:16:58
+ program="Clam AV"; version="ClamAV 0.96.5"; virusDefinitions="14142/Mon Dec 19 09:17:31 2011
+"
+
+ Pass
+
+
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 889ed287-e376-4ed8-bcf4-458cc8f699d7
+
+ format identification
+ 2012-01-17T20:16:42
+ program="Droid"; version="3.0"
+
+ Tentative
+
+ x-fmt/18
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ 0bd4f32b-6d7c-49f6-b7e1-cbe624cc5450
+
+ validation
+ 2012-01-17T20:16:42
+ program="Jhove"; version="1.5"
+
+ pass
+
+ format="ASCII"; result="Well-Formed and valid"
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ UUID
+ c46c6d84-4ba2-4c0e-95f1-6ba05178f335
+
+ fixity check
+ 2012-01-17T20:20:00
+ program="python"; module="hashlib.sha256()"
+
+ Pass
+
+ 0dff86ef77411a8f6bcaf453fab5f1c9c88bd4ed7da526827d982cf767eaacfbverified
+
+
+
+ preservation system
+ Archivematica-0.8
+
+
+ repository code
+ ORG
+
+
+
+
+
+
+
+
+
+
+ preservation system
+ Archivematica-0.8
+
+ Archivematica
+ software
+
+
+
+
+
+
+
+
+
+ repository code
+ ORG
+
+ Your Organization Name Here
+ organization
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/metsrw/__init__.py b/metsrw/__init__.py
index 7b3e56e..716a0c2 100644
--- a/metsrw/__init__.py
+++ b/metsrw/__init__.py
@@ -43,7 +43,7 @@
LOGGER = logging.getLogger(__name__)
LOGGER.addHandler(logging.NullHandler())
-__version__ = '0.2.4'
+__version__ = '0.3.0'
__all__ = [
'AMDSec',
diff --git a/metsrw/fsentry.py b/metsrw/fsentry.py
index 0103146..d609f62 100644
--- a/metsrw/fsentry.py
+++ b/metsrw/fsentry.py
@@ -105,10 +105,16 @@ def __init__(self, path=None, label=None, use='original', type=u'Item',
# path can validly be any encoding; if this value needs
# to be spliced later on, it's better to treat it as a
# bytestring than as actually being encoded text.
- # TODO update this with six and bytes
- if path:
- path = str(path)
- self.path = path
+ if six.PY2:
+ if isinstance(path, six.text_type):
+ self.path = path.encode('utf-8')
+ else:
+ self.path = path
+ else: # TODO: Py3 is still using Unicode.
+ if isinstance(path, six.binary_type):
+ self.path = path.decode('utf-8', errors="strict")
+ else:
+ self.path = path
if label is None and path is not None:
label = os.path.basename(path)
self.label = label
@@ -135,6 +141,20 @@ def __init__(self, path=None, label=None, use='original', type=u'Item',
self.amdsecs = []
self.dmdsecs = []
+ @classmethod
+ def dir(cls, label, children):
+ """Return ``FSEntry`` directory object."""
+ return FSEntry(
+ label=label, children=children, type=u"Directory", use=None)
+
+ @classmethod
+ def from_fptr(cls, label, type_, fptr):
+ """Return ``FSEntry`` object."""
+ return FSEntry(
+ label=label, type=type_, path=fptr.path, use=fptr.use,
+ file_uuid=fptr.file_uuid, derived_from=fptr.derived_from,
+ checksum=fptr.checksum, checksumtype=fptr.checksumtype)
+
def __str__(self):
return '{s.type}: {s.path}'.format(s=self)
diff --git a/metsrw/mets.py b/metsrw/mets.py
index a927396..5eb776b 100755
--- a/metsrw/mets.py
+++ b/metsrw/mets.py
@@ -330,14 +330,27 @@ def _parse_tree_structmap(self, tree, parent_elem,
continue # Only handle divs, not fptrs
entry_type = elem.get('TYPE')
label = elem.get('LABEL')
- fptr = self._analyze_fptr(elem, tree, entry_type)
- children = self._parse_tree_structmap(
- tree, elem, normative_parent_elem=normative_elem)
- fs_entry = fsentry.FSEntry(
- path=fptr.path, label=label, use=fptr.use, type=entry_type,
- children=children, file_uuid=fptr.file_uuid,
- derived_from=fptr.derived_from, checksum=fptr.checksum,
- checksumtype=fptr.checksumtype)
+ fptr_elems = elem.findall('mets:fptr', namespaces=utils.NAMESPACES)
+ # Directories are walked recursively. Additionally, they may
+ # contain direct fptrs.
+ if entry_type.lower() == "directory":
+ children = self._parse_tree_structmap(
+ tree, elem, normative_parent_elem=normative_elem)
+ fs_entry = fsentry.FSEntry.dir(label, children)
+ self._add_dmdsecs_to_fs_entry(elem, fs_entry, tree)
+ siblings.append(fs_entry)
+ for fptr_elem in fptr_elems:
+ fptr = self._analyze_fptr(fptr_elem, tree, entry_type)
+ fs_entry = fsentry.FSEntry.from_fptr(
+ label=None, type_=u"Item", fptr=fptr)
+ self._add_amdsecs_to_fs_entry(fptr.amdids, fs_entry, tree)
+ siblings.append(fs_entry)
+ continue
+ # Other types, e.g.: items, aips...
+ if not len(fptr_elems):
+ continue
+ fptr = self._analyze_fptr(fptr_elems[0], tree, entry_type)
+ fs_entry = fsentry.FSEntry.from_fptr(label, entry_type, fptr)
self._add_dmdsecs_to_fs_entry(elem, fs_entry, tree)
self._add_amdsecs_to_fs_entry(fptr.amdids, fs_entry, tree)
siblings.append(fs_entry)
@@ -369,48 +382,49 @@ def _get_el_to_normative(parent_elem, normative_parent_elem):
return el_to_normative
@staticmethod
- def _analyze_fptr(elem, tree, entry_type):
- fptr = elem.find('mets:fptr', namespaces=utils.NAMESPACES)
- if fptr is None:
- return FPtr(*[None] * 7)
- else:
- file_uuid = derived_from = use = path = amdids = checksum = \
- checksumtype = None
- file_id = fptr.get('FILEID')
- file_elem = tree.find(
- 'mets:fileSec//mets:file[@ID="' + file_id + '"]',
- namespaces=utils.NAMESPACES)
- if file_elem is None:
- raise exceptions.ParseError(
- '%s exists in structMap but not fileSec' % file_id)
- use = file_elem.getparent().get('USE')
- path = file_elem.find(
- 'mets:FLocat', namespaces=utils.NAMESPACES).get(
- utils.lxmlns('xlink') + 'href')
- try:
- path = utils.urldecode(path)
- except ValueError:
- raise exceptions.ParseError(
- 'Value "{}" (of attribute xlink:href) is not a valid'
- ' URL.'.format(path))
- amdids = file_elem.get('ADMID')
- checksum = file_elem.get('CHECKSUM')
- checksumtype = file_elem.get('CHECKSUMTYPE')
- file_id_prefix = utils.FILE_ID_PREFIX
- # If the file is an AIP, then its prefix is not "file-" but the
- # name of the AIP. Therefore we need to get the extension-less
- # basename of the AIP's path and remove its UUID suffix to ge
- # the prefix to remove from the FILEID attribute value.
- if entry_type.lower() == 'archival information package':
- file_id_prefix = os.path.splitext(
- os.path.basename(path))[0][:-36]
- file_uuid = file_id.replace(file_id_prefix, '', 1)
- group_uuid = file_elem.get('GROUPID', '').replace(
- utils.GROUP_ID_PREFIX, '', 1)
- if group_uuid != file_uuid:
- derived_from = group_uuid # Use group_uuid as placeholder
- return FPtr(file_uuid, derived_from, use, path, amdids,
- checksum, checksumtype)
+ def _analyze_fptr(fptr_elem, tree, entry_type):
+ file_uuid = derived_from = use = path = amdids = checksum = \
+ checksumtype = None
+ file_id = fptr_elem.get('FILEID')
+ file_elem = tree.find(
+ 'mets:fileSec//mets:file[@ID="' + file_id + '"]',
+ namespaces=utils.NAMESPACES)
+ if file_elem is None:
+ raise exceptions.ParseError(
+ '%s exists in structMap but not fileSec' % file_id)
+ use = file_elem.getparent().get('USE')
+ path = file_elem.find(
+ 'mets:FLocat', namespaces=utils.NAMESPACES).get(
+ utils.lxmlns('xlink') + 'href')
+ try:
+ path = utils.urldecode(path)
+ except ValueError:
+ raise exceptions.ParseError(
+ 'Value "{}" (of attribute xlink:href) is not a valid'
+ ' URL.'.format(path))
+ amdids = file_elem.get('ADMID')
+ checksum = file_elem.get('CHECKSUM')
+ checksumtype = file_elem.get('CHECKSUMTYPE')
+ file_id_prefix = utils.FILE_ID_PREFIX
+ # If the file is an AIP, then its prefix is not "file-" but the
+ # name of the AIP. Therefore we need to get the extension-less
+ # basename of the AIP's path and remove its UUID suffix to ge
+ # the prefix to remove from the FILEID attribute value.
+ if entry_type.lower() == 'archival information package':
+ file_id_prefix = os.path.splitext(os.path.basename(path))[0][:-36]
+ # If the file is part of a directory (with no intermediate item), then
+ # its prefix *may not* be "file-" but the name of the file. This
+ # pattern is found in old Archivematica METS files, e.g. see
+ # ``fixtures/mets_dir_with_many_ptrs.xml``.
+ elif entry_type.lower() == 'directory' and file_id[:5] != "file-":
+ file_id_prefix = os.path.basename(path) + "-"
+ file_uuid = file_id.replace(file_id_prefix, '', 1)
+ group_uuid = file_elem.get('GROUPID', '').replace(
+ utils.GROUP_ID_PREFIX, '', 1)
+ if group_uuid != file_uuid:
+ derived_from = group_uuid # Use group_uuid as placeholder
+ return FPtr(file_uuid, derived_from, use, path, amdids,
+ checksum, checksumtype)
@staticmethod
def _add_dmdsecs_to_fs_entry(elem, fs_entry, tree):
@@ -445,10 +459,15 @@ def _parse_tree(self, tree=None):
tree = self.tree
# self._validate()
# Check CREATEDATE < now
- createdate = self.tree.find('mets:metsHdr', namespaces=utils.NAMESPACES).get('CREATEDATE')
+ try:
+ createdate = self.tree.find(
+ 'mets:metsHdr', namespaces=utils.NAMESPACES).get('CREATEDATE')
+ except AttributeError:
+ createdate = None
now = datetime.utcnow().isoformat('T')
if createdate and createdate > now:
- raise exceptions.ParseError('CREATEDATE more recent than now (%s)' % now)
+ raise exceptions.ParseError(
+ 'CREATEDATE more recent than now (%s)' % now)
self.createdate = createdate
# Parse structMap
diff --git a/metsrw/plugins/premisrw/__init__.py b/metsrw/plugins/premisrw/__init__.py
index 69d2bc2..e356bc3 100644
--- a/metsrw/plugins/premisrw/__init__.py
+++ b/metsrw/plugins/premisrw/__init__.py
@@ -18,6 +18,12 @@
)
from .utils import (
XSI_NAMESPACE,
+ PREMIS_2_1_VERSION,
+ PREMIS_2_1_NAMESPACE,
+ PREMIS_2_1_XSD,
+ PREMIS_2_1_SCHEMA_LOCATION,
+ PREMIS_2_1_NAMESPACES,
+ PREMIS_2_1_META,
PREMIS_2_2_VERSION,
PREMIS_2_2_NAMESPACE,
PREMIS_2_2_XSD,
@@ -48,6 +54,9 @@
__all__ = ['PREMISElement', 'PREMISObject', 'PREMISEvent', 'PREMISAgent',
'data_to_premis', 'premis_to_data', 'data_find', 'data_find_all',
'data_find_text', 'data_find_text_or_all', 'XSI_NAMESPACE',
+ 'PREMIS_2_1_VERSION', 'PREMIS_2_1_NAMESPACE', 'PREMIS_2_1_XSD',
+ 'PREMIS_2_1_SCHEMA_LOCATION', 'PREMIS_2_1_NAMESPACES',
+ 'PREMIS_2_1_META',
'PREMIS_2_2_VERSION', 'PREMIS_2_2_NAMESPACE', 'PREMIS_2_2_XSD',
'PREMIS_2_2_SCHEMA_LOCATION', 'PREMIS_2_2_NAMESPACES',
'PREMIS_2_2_META', 'PREMIS_3_0_VERSION', 'PREMIS_3_0_NAMESPACE',
diff --git a/metsrw/plugins/premisrw/premis.py b/metsrw/plugins/premisrw/premis.py
index ee25dda..a615a71 100644
--- a/metsrw/plugins/premisrw/premis.py
+++ b/metsrw/plugins/premisrw/premis.py
@@ -694,7 +694,8 @@ def data_find_text(data, path):
texts = [child for child in el[1:]
if not isinstance(child, (tuple, list, dict))]
if texts:
- return ' '.join([str(x) for x in texts])
+ return ' '.join(
+ [x.encode('utf-8', errors='ignore') for x in texts])
return None
diff --git a/metsrw/plugins/premisrw/utils.py b/metsrw/plugins/premisrw/utils.py
index c997143..ad4dc58 100644
--- a/metsrw/plugins/premisrw/utils.py
+++ b/metsrw/plugins/premisrw/utils.py
@@ -3,6 +3,21 @@
XSI_NAMESPACE = 'http://www.w3.org/2001/XMLSchema-instance'
+# PREMIS v. 2.1
+PREMIS_2_1_VERSION = '2.1'
+PREMIS_2_1_NAMESPACE = 'info:lc/xmlns/premis-v1'
+PREMIS_2_1_XSD = 'http://www.loc.gov/standards/premis/v2/premis-v2-1.xsd'
+PREMIS_2_1_SCHEMA_LOCATION = '{} {}'.format(
+ PREMIS_2_1_NAMESPACE, PREMIS_2_1_XSD)
+PREMIS_2_1_NAMESPACES = {
+ 'premis': PREMIS_2_1_NAMESPACE,
+ 'xsi': XSI_NAMESPACE
+}
+PREMIS_2_1_META = {
+ 'xsi:schema_location': PREMIS_2_1_SCHEMA_LOCATION,
+ 'version': PREMIS_2_1_VERSION
+}
+
# PREMIS v. 2.2
PREMIS_2_2_VERSION = '2.2'
PREMIS_2_2_NAMESPACE = 'info:lc/xmlns/premis-v2'
@@ -34,6 +49,10 @@
}
PREMIS_VERSIONS_MAP = {
+ PREMIS_2_1_VERSION: {
+ 'namespaces': PREMIS_2_2_NAMESPACES,
+ 'meta': PREMIS_2_1_META
+ },
PREMIS_2_2_VERSION: {
'namespaces': PREMIS_2_2_NAMESPACES,
'meta': PREMIS_2_2_META
diff --git a/requirements/dev.txt b/requirements/dev.txt
index d3a09bb..5b6f08e 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -1,4 +1,5 @@
-r base.txt
+mock
pytest
pytest-cov
sphinx>=1.3
diff --git a/tests/test_fsentry.py b/tests/test_fsentry.py
index 3968e23..12ed751 100644
--- a/tests/test_fsentry.py
+++ b/tests/test_fsentry.py
@@ -1,7 +1,9 @@
# -*- coding: utf-8 -*-
+
import pytest
from unittest import TestCase
import uuid
+import six
import metsrw
@@ -9,6 +11,15 @@
class TestFSEntry(TestCase):
""" Test FSEntry class. """
+ @pytest.mark.skipif(six.PY3, reason="metsrw still uses Unicode in python3")
+ def test_path_is_binary(self):
+ """It should store the ``path`` as a bytestring."""
+ sample = u'💜🎑💜'
+ assert isinstance(metsrw.FSEntry(
+ sample, type='Directory').path, six.binary_type)
+ assert isinstance(metsrw.FSEntry(
+ sample.encode('utf-8'), type='Directory').path, six.binary_type)
+
def test_create_invalid_checksum_type(self):
""" It should only accept METS valid checksum types. """
metsrw.FSEntry(
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
index 2daaa5d..18c5717 100644
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -52,8 +52,8 @@ def test_replacement_techmd(self):
techmd_old = metsrw.SubSection('techMD', self.STUB_MDWRAP)
techmd_new = metsrw.SubSection('techMD', self.STUB_MDWRAP)
techmd_old.replace_with(techmd_new)
- assert techmd_old.get_status() is 'superseded'
- assert techmd_new.get_status() is 'current'
+ assert techmd_old.get_status() == 'superseded'
+ assert techmd_new.get_status() == 'current'
def test_replacement_sourcemd(self):
""" It should have no special behaviour replacing sourceMDs. """
diff --git a/tests/test_mets.py b/tests/test_mets.py
index 6acb119..15dc2de 100644
--- a/tests/test_mets.py
+++ b/tests/test_mets.py
@@ -4,6 +4,7 @@
from lxml import etree
from lxml.builder import ElementMaker
import os
+import mock
import pytest
from unittest import TestCase
import uuid
@@ -158,6 +159,46 @@ def test_fromfile_invalid_xlink_href(self):
metsrw.METSDocument.fromfile(
'fixtures/mets_invalid_xlink_hrefs.xml')
+ def test_analyze_fptr(self):
+ parser = etree.XMLParser(remove_blank_text=True)
+ tree = etree.parse('fixtures/mets_dir_with_fptrs.xml', parser=parser)
+ mw = metsrw.METSDocument()
+
+ # Test that exception is raised when fileSec cannot be found.
+ fptr_elem = etree.fromstring('')
+ with pytest.raises(metsrw.exceptions.ParseError,
+ match='12345 exists in structMap but not fileSec'):
+ metsrw.METSDocument._analyze_fptr(fptr_elem, tree, 'directory')
+
+ # Test that exception is raised when the path cannot be decoded.
+ fptr_elem = etree.fromstring(
+ '')
+ with mock.patch('metsrw.utils.urldecode') as urldecode:
+ urldecode.side_effect = ValueError()
+ with pytest.raises(metsrw.exceptions.ParseError,
+ match='is not a valid URL'):
+ metsrw.METSDocument._analyze_fptr(fptr_elem, tree, 'directory')
+
+ # Test the integrity of the ``FPtr`` object returned.
+ fptr = mw._analyze_fptr(fptr_elem, tree, 'directory')
+ assert fptr == metsrw.mets.FPtr(
+ file_uuid='fc0e52ca-a688-41c0-a10b-c1d36e21e804',
+ derived_from=None, use='original', path='objects/AM68.csv',
+ amdids='amdSec_3', checksum=None, checksumtype=None)
+
+ def test_analyze_fptr_from_aip(self):
+ parser = etree.XMLParser(remove_blank_text=True)
+ tree = etree.parse(
+ 'fixtures/production-pointer-file.xml', parser=parser)
+ mw = metsrw.METSDocument()
+
+ fptr_elem = tree.find(
+ '//mets:fptr[1]', namespaces=metsrw.utils.NAMESPACES)
+ fptr = mw._analyze_fptr(
+ fptr_elem, tree, 'Archival Information Package')
+ assert fptr.file_uuid == '7327b00f-d83a-4ae8-bb89-84fce994e827'
+ assert fptr.use == 'Archival Information Package'
+
class TestWholeMETS(TestCase):
""" Test integration between classes. """
@@ -587,6 +628,18 @@ def test_parse_production_pointer_file(self):
aip_uuid = '7327b00f-d83a-4ae8-bb89-84fce994e827'
assert mw.get_file(file_uuid=aip_uuid)
+ def test_parse_dir_with_fptrs(self):
+ mets_path = 'fixtures/mets_dir_with_fptrs.xml'
+ mw = metsrw.METSDocument.fromfile(mets_path)
+ assert len(mw.all_files()) == 5
+ assert mw.get_file(type='Directory', label='objects')
+ for item in (
+ ['3a6a182a-40a0-4c2b-9752-fc7e91ac1edf', 'objects/V00154.MPG'],
+ ['431913ba-4379-4373-8798-cc5f2b9dd769', 'objects/V00158.MPG'],
+ ['fc0e52ca-a688-41c0-a10b-c1d36e21e804', 'objects/AM68.csv'],
+ ):
+ assert mw.get_file(type='Item', file_uuid=item[0], path=item[1])
+
# Helper methods
def assert_mets_valid(self, mets_doc, schematron=metsrw.AM_SCT_PATH):