diff --git a/metsrw/mets.py b/metsrw/mets.py index 3627455..91648ee 100755 --- a/metsrw/mets.py +++ b/metsrw/mets.py @@ -447,7 +447,10 @@ def _analyze_fptr(fptr_elem, tree, entry_type): # plus `file-` on 1.10+. Therefore we need to get the extension-less # basename of the AIP's path and remove its UUID suffix to ge # the prefix to remove from the FILEID attribute value. - if entry_type.lower() == "archival information package": + if entry_type.lower() in ( + "archival information package", + "archival information collection", + ): aip_name = os.path.splitext(os.path.basename(path))[0][:-36] if file_id.startswith(file_id_prefix): file_id_prefix = file_id_prefix + aip_name diff --git a/tests/test_mets.py b/tests/test_mets.py index a84e567..be9bff2 100644 --- a/tests/test_mets.py +++ b/tests/test_mets.py @@ -376,6 +376,39 @@ def test_analyze_fptr_sets_uuid_from_aip_with_file_id_prefix(self): assert fptr.file_uuid == "9b9f129c-8062-471b-a009-9ee0ad655f08" + def test_analyze_fptr_sets_uuid_from_aic_with_file_id_prefix(self): + """ + Test that AIC FILEIDs with a leading `file-` are parsed properly. + """ + tree = etree.fromstring( + b""" + + + + + + + + + + + + + + + + + + + """ + ) + fptr_elem = tree.find(".//mets:fptr[1]", namespaces=metsrw.utils.NAMESPACES) + fptr = metsrw.METSDocument()._analyze_fptr( + fptr_elem, tree, "Archival Information Collection" + ) + + assert fptr.file_uuid == "258f73d1-08fe-4ade-9770-1d8812cde545" + def test_duplicate_ids(self): """ We don't want duplicate ids to be generated, but if specified, they shouldn't break