Skip to content

Commit

Permalink
Add header altRecordID support
Browse files Browse the repository at this point in the history
This (and all the METS elements) could be made a bit more general to
save some code, but leaving that as a future enhancement for now.
  • Loading branch information
cole committed Mar 26, 2019
1 parent 9623688 commit dc06b07
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 5 deletions.
3 changes: 2 additions & 1 deletion metsrw/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from .exceptions import MetsError, ParseError
from .fsentry import FSEntry
from .metadata import Agent, AMDSec, SubSection, MDRef, MDWrap
from .metadata import Agent, AltRecordID, AMDSec, SubSection, MDRef, MDWrap
from .mets import METSDocument
from .utils import (
NAMESPACES,
Expand Down Expand Up @@ -47,6 +47,7 @@

__all__ = [
"Agent",
"AltRecordID",
"AMDSec",
"AM_PNTR_SCT_PATH",
"AM_SCT_PATH",
Expand Down
52 changes: 52 additions & 0 deletions metsrw/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,58 @@ def serialize(self, now=None):
return el


class AltRecordID(object):
"""
An object representing an alternative record identifier in the METS document
(alternatives to the OBJID).
This is ordinarily created by :class:`metsrw.mets.METSDocument` instances and
does not have to be instantiated directly.
:param str id: Optional unique identifer for the identifier.
:param str type: Optional identifer type, e.g. 'Accession number'.
"""

ALT_RECORD_ID_TAG = etree.QName(utils.NAMESPACES[u"mets"], u"altRecordID")

def __init__(self, alt_record_id, **kwargs):
self.text = alt_record_id
# We use kwargs here to avoid shadowing builtins (id and type).
self.id = kwargs.get("id", None)
self.type = kwargs.get("type", None)

@classmethod
def parse(cls, element):
"""
Create a new AltRecordID by parsing root.
:param element: Element to be parsed into an AltRecordID.
:raises exceptions.ParseError: If element is not a valid altRecordID.
"""
if element.tag != cls.ALT_RECORD_ID_TAG:
raise exceptions.ParseError(
u"AltRecordID got unexpected tag {}; expected {}".format(
element.tag, cls.ALT_RECORD_ID_TAG
)
)

return cls(element.text, id=element.get(u"ID"), type=element.get(u"TYPE"))

def serialize(self):
attrs = {}

if self.id:
attrs[u"ID"] = self.id

if self.type:
attrs[u"TYPE"] = self.type

element = etree.Element(self.ALT_RECORD_ID_TAG, **attrs)
element.text = self.text

return element


class Agent(object):
"""
An object representing an agent with a relationship to the METS record.
Expand Down
18 changes: 14 additions & 4 deletions metsrw/mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(self):
# can be inferred via their #children attribute
self.createdate = None
self.objid = None
self.alternate_ids = []
self._root_elements = []
self._all_files = None
self._iter = None
Expand Down Expand Up @@ -192,10 +193,10 @@ def _mets_header(self, now):
header_attrs[u"LASTMODDATE"] = now

header_element = etree.Element(header_tag, **header_attrs)

for agent in self.agents:
agent_element = agent.serialize()
header_element.append(agent_element)
header_element.append(agent.serialize())
for alternate_id in self.alternate_ids:
header_element.append(alternate_id.serialize())

return header_element

Expand Down Expand Up @@ -519,11 +520,20 @@ def _parse_header(self, tree):
self.createdate = createdate

if header is not None:
agent_elements = header.findall(u"mets:agent", namespaces=utils.NAMESPACES)
agent_elements = header.findall(
metadata.Agent.AGENT_TAG, namespaces=utils.NAMESPACES
)
for agent_element in agent_elements:
agent = metadata.Agent.parse(agent_element)
self.agents.append(agent)

alternate_ids = header.findall(
metadata.AltRecordID.ALT_RECORD_ID_TAG, namespaces=utils.NAMESPACES
)
for alternate_id_element in alternate_ids:
alternate_id = metadata.AltRecordID.parse(alternate_id_element)
self.alternate_ids.append(alternate_id)

def _validate(self):
raise NotImplementedError()

Expand Down
27 changes: 27 additions & 0 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,33 @@ def test_serialize_with_other_role(self):
assert element.get("OTHERROLE") == agent.role


class TestAltRecordId(TestCase):
def test_parse_exception_on_wrong_tag(self):
element = etree.Element("test")
with pytest.raises(metsrw.ParseError):
metsrw.AltRecordID.parse(element)

def test_parse(self):
element = etree.Element(
metsrw.AltRecordID.ALT_RECORD_ID_TAG, ID="543", TYPE="Test"
)
element.text = "a-unique-id"
alt_record_id = metsrw.AltRecordID.parse(element)

assert alt_record_id.text == element.text
assert alt_record_id.id == element.get("ID")
assert alt_record_id.type == element.get("TYPE")

def test_serialize(self):
alt_record_id = metsrw.AltRecordID("12345", id="1", type="Accession Id")
element = alt_record_id.serialize()

assert element.get("ID") == alt_record_id.id
assert element.get("TYPE") == alt_record_id.type

assert element.text == alt_record_id.text


class TestAMDSec(TestCase):
""" Test AMDSec class. """

Expand Down
30 changes: 30 additions & 0 deletions tests/test_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,36 @@ def test_parse_header_with_agent(self):
assert mets.agents[0].name == u"39461beb-22eb-4942-88af-848cfc3462b2"
assert mets.agents[0].notes[0] == u"Archivematica dashboard UUID"

def test_mets_header_with_alt_record_id(self):
mets = metsrw.METSDocument()
alt_record_id = metsrw.AltRecordID(
"39461beb-22eb-4942-88af-848cfc3462b2", type="Accession ID"
)
mets.alternate_ids.append(alt_record_id)

header_element = mets._mets_header("2014-07-16T22:52:02.480108")
alt_record_id_element = header_element.find(
"{http://www.loc.gov/METS/}altRecordID"
)

assert alt_record_id_element.get("TYPE") == alt_record_id.type
assert alt_record_id_element.text == alt_record_id.text

def test_parse_header_with_alt_record_id(self):
mets = metsrw.METSDocument.fromstring(
b"""<?xml version='1.0' encoding='ASCII'?>
<mets xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.loc.gov/METS/" xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version18/mets.xsd">
<metsHdr CREATEDATE="2015-12-16T22:38:48">
<altRecordID TYPE="Accession Id">39461beb-22eb-4942-88af-848cfc3462b2</altRecordID>
</metsHdr>
<structMap ID="structMap_1" LABEL="Archivematica default" TYPE="physical"/>
</mets>"""
)

assert len(mets.alternate_ids) == 1
assert mets.alternate_ids[0].type == u"Accession Id"
assert mets.alternate_ids[0].text == u"39461beb-22eb-4942-88af-848cfc3462b2"

def test_fromfile_invalid_xlink_href(self):
"""Test that ``fromfile`` raises ``ParseError`` if an xlink:href value
in the source METS contains an unparseable URL.
Expand Down

0 comments on commit dc06b07

Please sign in to comment.