diff --git a/metsrw/__init__.py b/metsrw/__init__.py index 6b1606c..32e860e 100644 --- a/metsrw/__init__.py +++ b/metsrw/__init__.py @@ -6,7 +6,7 @@ from .exceptions import MetsError, ParseError from .fsentry import FSEntry -from .metadata import Agent, AMDSec, SubSection, MDRef, MDWrap +from .metadata import Agent, AltRecordID, AMDSec, SubSection, MDRef, MDWrap from .mets import METSDocument from .utils import ( NAMESPACES, @@ -47,6 +47,7 @@ __all__ = [ "Agent", + "AltRecordID", "AMDSec", "AM_PNTR_SCT_PATH", "AM_SCT_PATH", diff --git a/metsrw/metadata.py b/metsrw/metadata.py index 726d5d5..69fbb90 100644 --- a/metsrw/metadata.py +++ b/metsrw/metadata.py @@ -98,6 +98,58 @@ def serialize(self, now=None): return el +class AltRecordID(object): + """ + An object representing an alternative record identifier in the METS document + (alternatives to the OBJID). + + This is ordinarily created by :class:`metsrw.mets.METSDocument` instances and + does not have to be instantiated directly. + + :param str id: Optional unique identifer for the identifier. + :param str type: Optional identifer type, e.g. 'Accession number'. + """ + + ALT_RECORD_ID_TAG = etree.QName(utils.NAMESPACES[u"mets"], u"altRecordID") + + def __init__(self, alt_record_id, **kwargs): + self.text = alt_record_id + # We use kwargs here to avoid shadowing builtins (id and type). + self.id = kwargs.get("id", None) + self.type = kwargs.get("type", None) + + @classmethod + def parse(cls, element): + """ + Create a new AltRecordID by parsing root. + + :param element: Element to be parsed into an AltRecordID. + :raises exceptions.ParseError: If element is not a valid altRecordID. + """ + if element.tag != cls.ALT_RECORD_ID_TAG: + raise exceptions.ParseError( + u"AltRecordID got unexpected tag {}; expected {}".format( + element.tag, cls.ALT_RECORD_ID_TAG + ) + ) + + return cls(element.text, id=element.get(u"ID"), type=element.get(u"TYPE")) + + def serialize(self): + attrs = {} + + if self.id: + attrs[u"ID"] = self.id + + if self.type: + attrs[u"TYPE"] = self.type + + element = etree.Element(self.ALT_RECORD_ID_TAG, **attrs) + element.text = self.text + + return element + + class Agent(object): """ An object representing an agent with a relationship to the METS record. diff --git a/metsrw/mets.py b/metsrw/mets.py index e93416f..62e2ed6 100755 --- a/metsrw/mets.py +++ b/metsrw/mets.py @@ -34,6 +34,7 @@ def __init__(self): # can be inferred via their #children attribute self.createdate = None self.objid = None + self.alternate_ids = [] self._root_elements = [] self._all_files = None self._iter = None @@ -192,10 +193,10 @@ def _mets_header(self, now): header_attrs[u"LASTMODDATE"] = now header_element = etree.Element(header_tag, **header_attrs) - for agent in self.agents: - agent_element = agent.serialize() - header_element.append(agent_element) + header_element.append(agent.serialize()) + for alternate_id in self.alternate_ids: + header_element.append(alternate_id.serialize()) return header_element @@ -519,11 +520,20 @@ def _parse_header(self, tree): self.createdate = createdate if header is not None: - agent_elements = header.findall(u"mets:agent", namespaces=utils.NAMESPACES) + agent_elements = header.findall( + metadata.Agent.AGENT_TAG, namespaces=utils.NAMESPACES + ) for agent_element in agent_elements: agent = metadata.Agent.parse(agent_element) self.agents.append(agent) + alternate_ids = header.findall( + metadata.AltRecordID.ALT_RECORD_ID_TAG, namespaces=utils.NAMESPACES + ) + for alternate_id_element in alternate_ids: + alternate_id = metadata.AltRecordID.parse(alternate_id_element) + self.alternate_ids.append(alternate_id) + def _validate(self): raise NotImplementedError() diff --git a/tests/test_metadata.py b/tests/test_metadata.py index b05234e..a573795 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -72,6 +72,33 @@ def test_serialize_with_other_role(self): assert element.get("OTHERROLE") == agent.role +class TestAltRecordId(TestCase): + def test_parse_exception_on_wrong_tag(self): + element = etree.Element("test") + with pytest.raises(metsrw.ParseError): + metsrw.AltRecordID.parse(element) + + def test_parse(self): + element = etree.Element( + metsrw.AltRecordID.ALT_RECORD_ID_TAG, ID="543", TYPE="Test" + ) + element.text = "a-unique-id" + alt_record_id = metsrw.AltRecordID.parse(element) + + assert alt_record_id.text == element.text + assert alt_record_id.id == element.get("ID") + assert alt_record_id.type == element.get("TYPE") + + def test_serialize(self): + alt_record_id = metsrw.AltRecordID("12345", id="1", type="Accession Id") + element = alt_record_id.serialize() + + assert element.get("ID") == alt_record_id.id + assert element.get("TYPE") == alt_record_id.type + + assert element.text == alt_record_id.text + + class TestAMDSec(TestCase): """ Test AMDSec class. """ diff --git a/tests/test_mets.py b/tests/test_mets.py index afdf019..e09707b 100644 --- a/tests/test_mets.py +++ b/tests/test_mets.py @@ -223,6 +223,36 @@ def test_parse_header_with_agent(self): assert mets.agents[0].name == u"39461beb-22eb-4942-88af-848cfc3462b2" assert mets.agents[0].notes[0] == u"Archivematica dashboard UUID" + def test_mets_header_with_alt_record_id(self): + mets = metsrw.METSDocument() + alt_record_id = metsrw.AltRecordID( + "39461beb-22eb-4942-88af-848cfc3462b2", type="Accession ID" + ) + mets.alternate_ids.append(alt_record_id) + + header_element = mets._mets_header("2014-07-16T22:52:02.480108") + alt_record_id_element = header_element.find( + "{http://www.loc.gov/METS/}altRecordID" + ) + + assert alt_record_id_element.get("TYPE") == alt_record_id.type + assert alt_record_id_element.text == alt_record_id.text + + def test_parse_header_with_alt_record_id(self): + mets = metsrw.METSDocument.fromstring( + b""" + + + 39461beb-22eb-4942-88af-848cfc3462b2 + + +""" + ) + + assert len(mets.alternate_ids) == 1 + assert mets.alternate_ids[0].type == u"Accession Id" + assert mets.alternate_ids[0].text == u"39461beb-22eb-4942-88af-848cfc3462b2" + def test_fromfile_invalid_xlink_href(self): """Test that ``fromfile`` raises ``ParseError`` if an xlink:href value in the source METS contains an unparseable URL.