From 8db53bb0951450d029056d404bc96849504c06a2 Mon Sep 17 00:00:00 2001 From: Daniel Harvey Date: Thu, 20 Oct 2016 00:38:30 -0700 Subject: [PATCH] Added ArcGISParser, test data, and full test coverage. --- docs/arcgis-metadata.dtd | 10 +- gis_metadata/arcgis_metadata_parser.py | 379 ++++++++++++++++++++ gis_metadata/fgdc_metadata_parser.py | 4 + gis_metadata/iso_metadata_parser.py | 4 + gis_metadata/metadata_parser.py | 43 +-- gis_metadata/tests/data/arcgis_metadata.xml | 310 ++++++++++++++++ gis_metadata/tests/data/fgdc_metadata.xml | 3 - gis_metadata/tests/data/test_arcgis.xml | 2 + gis_metadata/tests/tests.py | 232 ++++++++++-- gis_metadata/utils.py | 85 ++--- 10 files changed, 955 insertions(+), 117 deletions(-) create mode 100644 gis_metadata/arcgis_metadata_parser.py create mode 100644 gis_metadata/tests/data/arcgis_metadata.xml create mode 100644 gis_metadata/tests/data/test_arcgis.xml diff --git a/docs/arcgis-metadata.dtd b/docs/arcgis-metadata.dtd index 0fcba69..0f9cc7c 100644 --- a/docs/arcgis-metadata.dtd +++ b/docs/arcgis-metadata.dtd @@ -71,8 +71,8 @@ - - + + @@ -435,7 +435,7 @@ - + @@ -690,7 +690,7 @@ - + @@ -768,7 +768,7 @@ - + diff --git a/gis_metadata/arcgis_metadata_parser.py b/gis_metadata/arcgis_metadata_parser.py new file mode 100644 index 0000000..4a2f9ea --- /dev/null +++ b/gis_metadata/arcgis_metadata_parser.py @@ -0,0 +1,379 @@ +""" A module to contain utility ArcGIS metadata parsing helpers """ + +import six + +from six import iteritems + +from gis_metadata.exceptions import ParserError +from gis_metadata.metadata_parser import MetadataParser + +from gis_metadata.utils import DATE_TYPE, DATE_TYPE_SINGLE, DATE_TYPE_MULTIPLE +from gis_metadata.utils import DATE_TYPE_RANGE, DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END +from gis_metadata.utils import ATTRIBUTES +from gis_metadata.utils import BOUNDING_BOX +from gis_metadata.utils import CONTACTS +from gis_metadata.utils import DATES +from gis_metadata.utils import DIGITAL_FORMS +from gis_metadata.utils import KEYWORDS_PLACE +from gis_metadata.utils import KEYWORDS_THEME +from gis_metadata.utils import LARGER_WORKS +from gis_metadata.utils import PROCESS_STEPS +from gis_metadata.utils import ParserProperty + +from gis_metadata.utils import format_xpaths, get_complex_definitions +from gis_metadata.utils import parse_complex_list, update_complex_list + +from parserutils.collections import flatten_items, reduce_value, wrap_value +from parserutils.elements import get_elements, get_element_name, get_element_attributes +from parserutils.elements import clear_element, element_to_dict, insert_element, remove_element, remove_empty_element + + +xrange = getattr(six.moves, 'xrange') + + +ARCGIS_ROOTS = ('metadata', 'Metadata') +ARCGIS_NODES = ('dataIdInfo', 'distInfo', 'dqInfo') + +_agis_definitions = get_complex_definitions() + +_agis_tag_formats = { + '_attribute_accuracy_root': 'dqInfo/report', + '_attributes_root': 'eainfo/detailed/attr', + '_bounding_box_root': 'dataIdInfo/dataExt/geoEle', + '_contacts_root': 'dataIdInfo/idPoC', + '_dataset_completeness_root': 'dqInfo/report', + '_dates_root': 'dataIdInfo/dataExt/tempEle', + '_digital_forms_root': 'distInfo/distFormat', + '_dist_liability_root': 'dataIdInfo/resConst', + '_transfer_options_root': 'distInfo/distTranOps/onLineSrc', + '_larger_works_root': 'dataIdInfo/aggrInfo/aggrDSName', + '_process_steps_root': 'dqInfo/dataLineage/prcStep', + '_use_constraints_root': 'dataIdInfo/resConst', + + 'title': 'dataIdInfo/idCitation/resTitle', + 'abstract': 'dataIdInfo/idAbs', + 'purpose': 'dataIdInfo/idPurp', + 'supplementary_info': 'dataIdInfo/suppInfo', + 'online_linkages': 'dataIdInfo/idCitation/citRespParty/rpCntInfo/cntOnlineRes/linkage', + '_online_linkages': 'dataIdInfo/idCitation/citOnlineRes/linkage', # If not in citRespParty + 'originators': 'dataIdInfo/idCitation/citRespParty/rpOrgName', + 'publish_date': 'dataIdInfo/idCitation/date/pubDate', + 'data_credits': 'dataIdInfo/idCredit', + CONTACTS: 'dataIdInfo/idPoC/{ct_path}', + 'dist_contact_org': 'distInfo/distributor/distorCont/rpOrgName', + 'dist_contact_person': 'distInfo/distributor/distorCont/rpIndName', + 'dist_address_type': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/@addressType', + 'dist_address': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/delPoint', + 'dist_city': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/city', + 'dist_state': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/adminArea', + 'dist_postal': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/postCode', + 'dist_country': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/country', + 'dist_phone': 'distInfo/distributor/distorCont/rpCntInfo/cntPhone/voiceNum', + '_dist_phone': 'distInfo/distributor/distorCont/rpCntInfo/voiceNum', # If not in cntPhone + 'dist_email': 'distInfo/distributor/distorCont/rpCntInfo/cntAddress/eMailAdd', + 'dist_liability': 'dataIdInfo/resConst/LegConsts/othConsts', + 'processing_fees': 'distInfo/distributor/distorOrdPrc/resFees', + 'processing_instrs': 'distInfo/distributor/distorOrdPrc/ordInstr', + 'resource_desc': 'dataIdInfo/idSpecUse/specUsage', + 'tech_prerequisites': 'dataIdInfo/envirDesc', + ATTRIBUTES: 'eainfo/detailed/attr/{ad_path}', # Same as in FGDC (and for good reason) + 'attribute_accuracy': 'dqInfo/report/measDesc', + BOUNDING_BOX: 'dataIdInfo/dataExt/geoEle/GeoBndBox/{bbox_path}', + 'dataset_completeness': 'dqInfo/report/measDesc', + DIGITAL_FORMS: 'distInfo/distFormat/{df_path}', + '_access_desc': 'distInfo/distTranOps/onLineSrc/orDesc', + '_access_instrs': 'distInfo/distTranOps/onLineSrc/protocol', + '_network_resource': 'distInfo/distTranOps/onLineSrc/linkage', + PROCESS_STEPS: 'dqInfo/dataLineage/prcStep/{ps_path}', + LARGER_WORKS: 'dataIdInfo/aggrInfo/aggrDSName/{lw_path}', + 'other_citation_info': 'dataIdInfo/idCitation/otherCitDet', + 'use_constraints': 'dataIdInfo/resConst/Consts/useLimit', + DATES: 'dataIdInfo/dataExt/tempEle/TempExtent/exTemp/{type_path}', + KEYWORDS_PLACE: 'dataIdInfo/placeKeys/keyword', + KEYWORDS_THEME: 'dataIdInfo/themeKeys/keyword' +} + + +class ArcGISParser(MetadataParser): + """ A class to parse metadata files generated by ArcGIS """ + + def _init_data_map(self): + """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ + + if self._data_map is not None: + return # Initiation happens once + + # Parse and validate the ArcGIS metadata root + + if self._xml_tree is None: + agis_root = ARCGIS_ROOTS[0] # Default to uncapitalized + else: + agis_root = get_element_name(self._xml_tree) + + if agis_root not in ARCGIS_ROOTS: + raise ParserError('Invalid XML root for ArcGIS metadata: {root}', root=agis_root) + + agis_data_map = {'_root': agis_root} + agis_data_map.update(_agis_tag_formats) + + agis_data_structures = {} + + # Capture and format complex XPATHs + + ad_format = agis_data_map[ATTRIBUTES] + agis_data_structures[ATTRIBUTES] = format_xpaths( + _agis_definitions[ATTRIBUTES], + label=ad_format.format(ad_path='attrlabl'), + aliases=ad_format.format(ad_path='attalias'), + definition=ad_format.format(ad_path='attrdef'), + definition_src=ad_format.format(ad_path='attrdefs') + ) + + bb_format = agis_data_map[BOUNDING_BOX] + agis_data_structures[BOUNDING_BOX] = format_xpaths( + _agis_definitions[BOUNDING_BOX], + east=bb_format.format(bbox_path='eastBL'), + south=bb_format.format(bbox_path='southBL'), + west=bb_format.format(bbox_path='westBL'), + north=bb_format.format(bbox_path='northBL') + ) + + ct_format = agis_data_map[CONTACTS] + agis_data_structures[CONTACTS] = format_xpaths( + _agis_definitions[CONTACTS], + name=ct_format.format(ct_path='rpIndName'), + organization=ct_format.format(ct_path='rpOrgName'), + position=ct_format.format(ct_path='rpPosName'), + email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd') + ) + + dt_format = agis_data_map[DATES] + agis_data_structures[DATES] = { + DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition'), + '_' + DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition/@date'), + DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin'), + '_' + DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin/@date'), + DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd'), + '_' + DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd/@date'), + # Same as multiple dates, but will contain only one + DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition'), + '_' + DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition/@date') + } + agis_data_structures[DATES][DATE_TYPE_RANGE] = [ + agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], + agis_data_structures[DATES][DATE_TYPE_RANGE_END] + ] + agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [ + agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN], + agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END] + ] + + df_format = agis_data_map[DIGITAL_FORMS] + agis_data_structures[DIGITAL_FORMS] = format_xpaths( + _agis_definitions[DIGITAL_FORMS], + name=df_format.format(df_path='formatName'), + content=df_format.format(df_path='formatInfo'), + decompression=df_format.format(df_path='fileDecmTech'), + version=df_format.format(df_path='formatVer'), + specification=df_format.format(df_path='formatSpec'), + access_desc=agis_data_map['_access_desc'], + access_instrs=agis_data_map['_access_instrs'], + network_resource=agis_data_map['_network_resource'] + ) + + lw_format = agis_data_map[LARGER_WORKS] + agis_data_structures[LARGER_WORKS] = format_xpaths( + _agis_definitions[LARGER_WORKS], + title=lw_format.format(lw_path='resTitle'), + edition=lw_format.format(lw_path='resEd'), + origin=lw_format.format(lw_path='citRespParty/rpIndName'), + online_linkage=lw_format.format(lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'), + other_citation=lw_format.format(lw_path='otherCitDet'), + date=lw_format.format(lw_path='date/pubDate'), + place=lw_format.format(lw_path='citRespParty/rpCntInfo/cntAddress/city'), + info=lw_format.format(lw_path='citRespParty/rpOrgName') + ) + + ps_format = agis_data_map[PROCESS_STEPS] + agis_data_structures[PROCESS_STEPS] = format_xpaths( + _agis_definitions[PROCESS_STEPS], + description=ps_format.format(ps_path='stepDesc'), + date=ps_format.format(ps_path='stepDateTm'), + sources=ps_format.format(ps_path='stepSrc/srcDesc') + ) + + # Assign XPATHS and gis_metadata.utils.ParserProperties to data map + + for prop, xpath in iteritems(dict(agis_data_map)): + if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS): + agis_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) + + elif prop in (BOUNDING_BOX, LARGER_WORKS): + agis_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) + + elif prop in ('attribute_accuracy', 'dataset_completeness'): + agis_data_map[prop] = ParserProperty(self._parse_report_item, self._update_report_item) + + elif prop == DATES: + agis_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) + + elif prop == DIGITAL_FORMS: + agis_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms) + + else: + agis_data_map[prop] = xpath + + self._data_map = agis_data_map + self._data_structures = agis_data_structures + + def _parse_digital_forms(self, prop=DIGITAL_FORMS): + """ Concatenates a list of Digital Form data structures parsed from the metadata """ + + xpath_map = self._data_structures[prop] + + # Parse base digital form fields: 'name', 'content', 'decompression', 'version', 'specification' + xpath_root = self._data_map['_digital_forms_root'] + digital_forms = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) + + # Parse digital form transfer option fields: 'access_desc', 'access_instrs', 'network_resource' + xpath_root = self._data_map['_transfer_options_root'] + transfer_opts = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) + + # Combine digital forms and transfer options into a single complex struct + + df_len = len(digital_forms) + to_len = len(transfer_opts) + parsed_forms = [] + + for idx in xrange(0, max(df_len, to_len)): + digital_form = {}.fromkeys(_agis_definitions[prop], u'') + + if idx < df_len: + digital_form.update(i for i in digital_forms[idx].items() if i[1]) + if idx < to_len: + digital_form.update(i for i in transfer_opts[idx].items() if i[1]) + + if any(digital_form.values()): + parsed_forms.append(digital_form) + + return parsed_forms + + def _parse_report_item(self, prop): + """ :return: the text for each element at the configured path if type attribute matches""" + + if prop == 'attribute_accuracy': + item_type = 'DQQuanAttAcc' + elif prop == 'dataset_completeness': + item_type = 'DQCompOm' + else: + return u'' + + xroot = self._get_xroot_for(prop) + + parsed = (element_to_dict(e) for e in get_elements(self._xml_tree, xroot)) + parsed = flatten_items(e['children'] for e in parsed if e['attributes'].get('type') == item_type) + + return reduce_value([p['text'] for p in parsed if p['name'] == 'measDesc']) + + def _update_digital_forms(self, **update_props): + """ + Update operation for ArcGIS Digital Forms metadata + :see: gis_metadata.utils._complex_definitions[DIGITAL_FORMS] + """ + + digital_forms = wrap_value(update_props['values']) + + # Update all Digital Form properties: distFormat* + + xpath_map = self._data_structures[update_props['prop']] + + dist_format_props = ('name', 'content', 'decompression', 'version', 'specification') + dist_format_xroot = self._data_map['_digital_forms_root'] + dist_format_xmap = {prop: xpath_map[prop] for prop in dist_format_props} + dist_formats = [] + + for digital_form in digital_forms: + dist_formats.append({prop: digital_form[prop] for prop in dist_format_props}) + + update_props['values'] = dist_formats + dist_formats = update_complex_list( + xpath_root=dist_format_xroot, xpath_map=dist_format_xmap, **update_props + ) + + # Update all Network Resources: distTranOps+ + + trans_option_props = ('access_desc', 'access_instrs', 'network_resource') + trans_option_xroot = self._data_map['_transfer_options_root'] + trans_option_xmap = {prop: self._data_map['_' + prop] for prop in trans_option_props} + + trans_options = [] + for digital_form in digital_forms: + trans_options.append({prop: digital_form[prop] for prop in trans_option_props}) + + update_props['values'] = trans_options + trans_options = update_complex_list( + xpath_root=trans_option_xroot, xpath_map=trans_option_xmap, **update_props + ) + + return { + 'distribution_formats': dist_formats, + 'transfer_options': trans_options + } + + def _update_dates(self, **update_props): + """ + Update operation for ArcGIS Dates metadata + :see: gis_metadata.utils._complex_definitions[DATES] + """ + + tree_to_update = update_props['tree_to_update'] + xpath_root = self._data_map['_dates_root'] + + if self.dates: + date_type = self.dates[DATE_TYPE] + + # First remove all date info from common root + remove_element(tree_to_update, xpath_root) + + if date_type == DATE_TYPE_MULTIPLE: + xpath_root += '/TempExtent/TM_Instant' + elif date_type == DATE_TYPE_RANGE: + xpath_root += '/TempExtent/TM_Period' + + return super(ArcGISParser, self)._update_dates(xpath_root, **update_props) + + def _update_report_item(self, **update_props): + """ Update the text for each element at the configured path if attribute matches """ + + tree_to_update = update_props['tree_to_update'] + prop = update_props['prop'] + values = wrap_value(update_props['values']) + xroot = self._get_xroot_for(prop) + + attr_key = 'type' + + if prop == 'attribute_accuracy': + attr_val = 'DQQuanAttAcc' + elif prop == 'dataset_completeness': + attr_val = 'DQCompOm' + else: + return [] + + # Clear (make empty) all elements of the appropriate type + for elem in get_elements(tree_to_update, xroot): + if get_element_attributes(elem).get(attr_key) == attr_val: + clear_element(elem) + + # Remove all empty elements, including those previously cleared + remove_empty_element(tree_to_update, xroot) + + # Insert elements with correct attributes for each new value + + attrs = {attr_key: attr_val} + updated = [] + + for idx, value in enumerate(values): + elem = insert_element(tree_to_update, idx, xroot, **attrs) + updated.append(insert_element(elem, idx, 'measDesc', value)) + + return updated diff --git a/gis_metadata/fgdc_metadata_parser.py b/gis_metadata/fgdc_metadata_parser.py index 3ebae0f..94c43cf 100644 --- a/gis_metadata/fgdc_metadata_parser.py +++ b/gis_metadata/fgdc_metadata_parser.py @@ -144,6 +144,10 @@ def _init_data_map(self): DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'), DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate') } + fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [ + fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], + fgdc_data_structures[DATES][DATE_TYPE_RANGE_END] + ] df_format = _fgdc_tag_formats[DIGITAL_FORMS] fgdc_data_structures[DIGITAL_FORMS] = format_xpaths( diff --git a/gis_metadata/iso_metadata_parser.py b/gis_metadata/iso_metadata_parser.py index 72873b0..a637bc5 100644 --- a/gis_metadata/iso_metadata_parser.py +++ b/gis_metadata/iso_metadata_parser.py @@ -237,6 +237,10 @@ def _init_data_map(self): DATE_TYPE_RANGE_END: dt_format.format(type_path='TimePeriod/end/TimeInstant/timePosition'), DATE_TYPE_SINGLE: dt_format.format(type_path='TimeInstant/timePosition') # Same as multiple } + iso_data_structures[DATES][DATE_TYPE_RANGE] = [ + iso_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], + iso_data_structures[DATES][DATE_TYPE_RANGE_END] + ] df_format = iso_data_map[DIGITAL_FORMS] iso_data_structures[DIGITAL_FORMS] = format_xpaths( diff --git a/gis_metadata/metadata_parser.py b/gis_metadata/metadata_parser.py index a316a39..aa8cd56 100644 --- a/gis_metadata/metadata_parser.py +++ b/gis_metadata/metadata_parser.py @@ -3,21 +3,21 @@ from copy import deepcopy from six import iteritems -from parserutils.elements import create_element_tree, element_to_dict, element_to_string +from parserutils.elements import create_element_tree, element_exists, element_to_string from parserutils.elements import remove_element, write_element, strip_namespaces -from parserutils.elements import get_element_name, get_element_tree, get_elements +from parserutils.elements import get_element_name, get_element_tree from parserutils.strings import DEFAULT_ENCODING from gis_metadata.utils import DATES, DATE_TYPE, DATE_VALUES from gis_metadata.utils import DATE_TYPE_RANGE, DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END -from gis_metadata.utils import has_element, parse_complex, parse_complex_list, parse_dates, parse_property +from gis_metadata.utils import parse_complex, parse_complex_list, parse_dates, parse_property from gis_metadata.utils import update_complex, update_complex_list, update_property, validate_any, validate_properties from gis_metadata.utils import _supported_props, ParserError # Place holders for lazy, one-time FGDC & ISO imports -ArcGISParser, ARCGIS_ROOTS = None, None +ArcGISParser, ARCGIS_ROOTS, ARCGIS_NODES = None, None, None FgdcParser, FGDC_ROOT = None, None IsoParser, ISO_ROOTS = None, None VALID_ROOTS = None @@ -60,10 +60,17 @@ def get_metadata_parser(metadata_container, **metadata_defaults): # The get_parsed_content method ensures only these roots will be returned - if xml_root == FGDC_ROOT: - return FgdcParser(xml_tree, **metadata_defaults) - elif xml_root in ISO_ROOTS: + if xml_root in ISO_ROOTS: return IsoParser(xml_tree, **metadata_defaults) + else: + has_arcgis_data = any(element_exists(xml_tree, e) for e in ARCGIS_NODES) + + if xml_root == FGDC_ROOT and not has_arcgis_data: + return FgdcParser(xml_tree, **metadata_defaults) + elif xml_root in ARCGIS_ROOTS: + return ArcGISParser(xml_tree, **metadata_defaults) + + return None def get_parsed_content(metadata_content): @@ -114,6 +121,7 @@ def get_parsed_content(metadata_content): def _import_parsers(): """ Lazy imports to prevent circular dependencies between this module and utils """ + global ARCGIS_NODES global ARCGIS_ROOTS global ArcGISParser @@ -125,10 +133,10 @@ def _import_parsers(): global VALID_ROOTS - if ARCGIS_ROOTS is None or ArcGISParser is None: - ARCGIS_ROOTS = tuple() - # from gis_metadata.arcgis_metadata_parser import ARCGIS_ROOTS - # from gis_metadata.arcgis_metadata_parser import ArcGISParser + if ARCGIS_NODES is None or ARCGIS_ROOTS is None or ArcGISParser is None: + from gis_metadata.arcgis_metadata_parser import ARCGIS_NODES + from gis_metadata.arcgis_metadata_parser import ARCGIS_ROOTS + from gis_metadata.arcgis_metadata_parser import ArcGISParser if FGDC_ROOT is None or FgdcParser is None: from gis_metadata.fgdc_metadata_parser import FGDC_ROOT @@ -262,11 +270,6 @@ def _get_xroot_for(self, prop): return self._get_xpath_for('_{prop}_root'.format(prop=prop)) - def _has_element(self, prop): - """ :return: True if the data map property exists in the XML tree, False otherwise """ - - return has_element(self._xml_tree, self._get_xpath_for(prop)) - def _parse_complex(self, prop): """ Default parsing operation for a complex struct """ @@ -288,14 +291,6 @@ def _parse_dates(self, prop=DATES): return parse_dates(self._xml_tree, self._data_structures[prop]) - def _parse_elements(self, prop, attr=None): - """ :return: the element for the XPATH corresponding to prop as a dict """ - - xpath = self._get_xpath_for(prop) - parsed = [element_to_dict(e, recurse=False) for e in get_elements(self._xml_tree, xpath)] - - return parsed if attr is None else parsed.get(attr) - def _update_complex(self, **update_props): """ Default update operation for a complex struct """ diff --git a/gis_metadata/tests/data/arcgis_metadata.xml b/gis_metadata/tests/data/arcgis_metadata.xml new file mode 100644 index 0000000..51adbe7 --- /dev/null +++ b/gis_metadata/tests/data/arcgis_metadata.xml @@ -0,0 +1,310 @@ + + + + + + Attributes Label 1 + Attributes Alias 1 + Attributes Definition 1 + Attributes Definition Source 1 + + + + Attributes Label 2 + Attributes Alias 2 + Attributes Definition 2 + Attributes Definition Source 2 + + + + Attributes Label 3 + Attributes Alias 3 + Attributes Definition 3 + Attributes Definition Source 3 + + + + + + + + + + + FGDC CSDGM Metadata + 1.0 + + + + + + + + + + ArcGIS Metadata + 1.0 + + + + Test Distribution Org + Test Distribution Person + + + Test Distribution Phone + + + Test Distribution Address + Test Distribution City + OR + 12345 + US + Test Distribution Email + + 8:00 AM - 5:00 PM PST + Backup Distribution Phone + + + + + Test Processing Fees + + Test Processing Instructions + + + + + Digital Form Name 1 + Digital Form Version 1 + Digital Form Content 1 + Digital Form Specification 1 + Digital Form Decompression 1 + + + + Digital Form Resource 1 + Digital Form Access Instructions 1 + Digital Form Access Description 1 + + + + Digital Form Name 2 + Digital Form Version 2 + Digital Form Content 2 + Digital Form Specification 2 + Digital Form Decompression 2 + + + + Digital Form Resource 2 + Digital Form Access Instructions 2 + Digital Form Access Description 2 + + + + + + + Larger Works Title + Larger Works Edition + Larger Works Other Citation + + Larger Works Originator + Larger Works Info + + + Larger Works Place + + + http://test.largerworks.online.linkage.com + + + + + Larger Works Date + + + + + Test Title + + Test Originators + + + Test Other Citation Info + + + + http://test.onlinelinkages.org + + + + + + http://backup.onlinelinkages.org + + + Test Resource Description + + + Test Publish Date + + + vector digital data + + + Test Abstract + Test Purpose + Test Data Credits + + Test Resource Description + + + + Contact Name 1 + Contact Organization 1 + Contact Position 1 + + + Contact Email 1 + + + + + + Contact Name 2 + Contact Organization 2 + Contact Position 2 + + + Contact Email 2 + + + + + + + + + Oregon + Washington + + + Ecoregion + + + Risk + Threat + Habitat + + + Not Used + + + + Test Distribution Liability + + + + + Test Use Constraints + + + + + + + + + + + + Multiple Date 1 + + + + + + + + + Multiple Date 2 + + + + + + + + + Multiple Date 3 + + + + + + + + + + + Test Supplementary Info + Test Technical Prerequisites + + + + + 1 + -179.99999999998656 + 179.99999999998656 + 87.81211601444309 + -86.78249642712764 + + + + + + + + Process Step Description 1 + Process Step Date 1 + + Process Step Sources 1.1 + + + Process Step Sources 1.2 + + + + Process Step Description 2 + + + +Process Step Date 3 + + + + Process Step Description 4 + + Process Step Sources 4.1 + + + Process Step Sources 4.2 + + + + + Test Attribute Accuracy + + + Test Dataset Completeness + + + dataset + + + + + + + diff --git a/gis_metadata/tests/data/fgdc_metadata.xml b/gis_metadata/tests/data/fgdc_metadata.xml index 0c428f2..ac32dfb 100644 --- a/gis_metadata/tests/data/fgdc_metadata.xml +++ b/gis_metadata/tests/data/fgdc_metadata.xml @@ -50,9 +50,6 @@ Multiple Date 3 - - Single Date - diff --git a/gis_metadata/tests/data/test_arcgis.xml b/gis_metadata/tests/data/test_arcgis.xml new file mode 100644 index 0000000..881cd0d --- /dev/null +++ b/gis_metadata/tests/data/test_arcgis.xml @@ -0,0 +1,2 @@ + +testtesttesttestattributesattributesattributesattributestesttesttestprocess_stepsprocess_stepsprocess_stepstest dataset_completenesstest attribute_accuracytestplace_keywordstestthematic_keywordstest use_constraintstest abstracttest supplementary_infotest tech_prerequisitestest dist_liabilitytesttesttesttestcontactscontactscontactscontactstest resource_descdatestesttest bounding_boxtest bounding_boxtest bounding_boxtest bounding_boxtest larger_workstest larger_workstest larger_workstest larger_workstest larger_workstest larger_workstest larger_workstest larger_workstest data_creditstest purposetest titletest publish_datetest originatorstest online_linkagesTest Other Citation InfoDigital Form Access Instructions 1Digital Form Access Description 1Digital Form Resource 1Digital Form Access Instructions 2Digital Form Access Description 2Digital Form Resource 2Digital Form Content 1Digital Form Decompression 1Digital Form Name 1Digital Form Version 1Digital Form Specification 1Digital Form Content 2Digital Form Decompression 2Digital Form Name 2Digital Form Version 2Digital Form Specification 2test processing_instrstest processing_feestest dist_contact_orgtest dist_contact_persontest dist_phonetest dist_addresstest dist_citytest dist_emailtest dist_postaltest dist_statetest dist_country \ No newline at end of file diff --git a/gis_metadata/tests/tests.py b/gis_metadata/tests/tests.py index e0a6dff..cf65788 100644 --- a/gis_metadata/tests/tests.py +++ b/gis_metadata/tests/tests.py @@ -8,6 +8,7 @@ from parserutils.elements import get_element_text, get_elements, get_remote_element from parserutils.elements import clear_element, remove_element, remove_element_attributes, set_element_attributes +from gis_metadata.arcgis_metadata_parser import ArcGISParser, ARCGIS_NODES, ARCGIS_ROOTS from gis_metadata.fgdc_metadata_parser import FgdcParser, FGDC_ROOT from gis_metadata.iso_metadata_parser import IsoParser, ISO_ROOTS, _iso_tag_formats from gis_metadata.metadata_parser import MetadataParser, get_metadata_parser, get_parsed_content @@ -151,11 +152,13 @@ def setUp(self): dir_name = os.path.dirname(os.path.abspath(__file__)) self.data_dir = sep.join((dir_name, 'data')) + self.arcgis_file = sep.join((self.data_dir, 'arcgis_metadata.xml')) self.fgdc_file = sep.join((self.data_dir, 'fgdc_metadata.xml')) self.iso_file = sep.join((self.data_dir, 'iso_metadata.xml')) # Initialize metadata files + self.arcgis_metadata = open(self.arcgis_file) self.fgdc_metadata = open(self.fgdc_file) self.iso_metadata = open(self.iso_file) @@ -163,6 +166,7 @@ def setUp(self): # Define test file paths + self.test_arcgis_file_path = '/'.join((self.data_dir, 'test_arcgis.xml')) self.test_fgdc_file_path = '/'.join((self.data_dir, 'test_fgdc.xml')) self.test_iso_file_path = '/'.join((self.data_dir, 'test_iso.xml')) @@ -353,6 +357,12 @@ def assert_valid_template(self, parser, root): '{0} property {1}, "{2}", does not equal "{3}"'.format(parser_type, prop, parsed_val, val) )) + def test_arcgis_template_values(self): + arcgis_template = ArcGISParser(**TEST_TEMPLATE_VALUES) + + self.assert_valid_template(arcgis_template, root='metadata') + self.assert_reparsed_simple_for(arcgis_template, TEST_TEMPLATE_VALUES) + def test_fgdc_template_values(self): fgdc_template = FgdcParser(**TEST_TEMPLATE_VALUES) @@ -366,11 +376,18 @@ def test_iso_template_values(self): self.assert_reparsed_simple_for(iso_template, TEST_TEMPLATE_VALUES) def test_template_conversion(self): + arcgis_template = ArcGISParser() fgdc_template = FgdcParser() iso_template = IsoParser() + self.assert_parser_conversion(arcgis_template, fgdc_template, 'template') + self.assert_parser_conversion(arcgis_template, iso_template, 'template') + self.assert_parser_conversion(fgdc_template, iso_template, 'template') + self.assert_parser_conversion(fgdc_template, arcgis_template, 'template') + self.assert_parser_conversion(iso_template, fgdc_template, 'template') + self.assert_parser_conversion(iso_template, arcgis_template, 'template') def test_template_conversion_bad_roots(self): @@ -385,57 +402,111 @@ def test_template_conversion_bad_roots(self): IsoParser(bad_root) with self.assertRaises(ParserError): FgdcParser(bad_root) + with self.assertRaises(ParserError): + ArcGISParser(bad_root) with self.assertRaises(ParserError): IsoParser(FGDC_ROOT.join(('<', '>'))) for iso_root in ISO_ROOTS: + with self.assertRaises(ParserError): + ArcGISParser(iso_root.join(('<', '>'))) with self.assertRaises(ParserError): FgdcParser(iso_root.join(('<', '>'))) + for arcgis_root in ARCGIS_ROOTS: + + with self.assertRaises(ParserError): + IsoParser(arcgis_root.join(('<', '>'))) + + if arcgis_root != FGDC_ROOT: + with self.assertRaises(ParserError): + FgdcParser(arcgis_root.join(('<', '>'))) + def test_template_conversion_from_dict(self): + for arcgis_root in ARCGIS_ROOTS: + for arcgis_node in ARCGIS_NODES: + + data = {'name': arcgis_root, 'children': [{'name': arcgis_node}]} + self.assert_parser_conversion( + FgdcParser(), get_metadata_parser(data), 'dict-based template' + ) + self.assert_parser_conversion( + IsoParser(), get_metadata_parser(data), 'dict-based template' + ) + + self.assert_parser_conversion( + ArcGISParser(), get_metadata_parser({'name': FGDC_ROOT}), 'dict-based template' + ) self.assert_parser_conversion( IsoParser(), get_metadata_parser({'name': FGDC_ROOT}), 'dict-based template' ) - fgdc_template = FgdcParser() - fgdc_template.dist_address_type = u'' # Address type not supported for ISO - for iso_root in ISO_ROOTS: self.assert_parser_conversion( - fgdc_template, get_metadata_parser({'name': iso_root}), 'dict-based template' + ArcGISParser(), get_metadata_parser({'name': iso_root}), 'dict-based template' + ) + self.assert_parser_conversion( + FgdcParser(), get_metadata_parser({'name': iso_root}), 'dict-based template' ) def test_template_conversion_from_str(self): + for arcgis_root in ARCGIS_ROOTS: + for arcgis_node in ARCGIS_NODES: + + data = arcgis_node.join(('<', '>')) + data = arcgis_root.join(('<', '>{0}')).format(data) + + self.assert_parser_conversion( + FgdcParser(), get_metadata_parser(data), 'dict-based template' + ) + self.assert_parser_conversion( + IsoParser(), get_metadata_parser(data), 'dict-based template' + ) + + self.assert_parser_conversion( + ArcGISParser(), get_metadata_parser(FGDC_ROOT.join(('<', '>'))), 'str-based template' + ) self.assert_parser_conversion( IsoParser(), get_metadata_parser(FGDC_ROOT.join(('<', '>'))), 'str-based template' ) - fgdc_template = FgdcParser() - fgdc_template.dist_address_type = u'' # Address type not supported for ISO - for iso_root in ISO_ROOTS: self.assert_parser_conversion( - fgdc_template, get_metadata_parser(iso_root.join(('<', '>'))), 'str-based template' + ArcGISParser(), get_metadata_parser(iso_root.join(('<', '>'))), 'str-based template' + ) + self.assert_parser_conversion( + FgdcParser(), get_metadata_parser(iso_root.join(('<', '>'))), 'str-based template' ) def test_template_conversion_from_type(self): self.assert_parser_conversion( - IsoParser(), get_metadata_parser(FgdcParser), 'type-based template' + ArcGISParser(), get_metadata_parser(FgdcParser), 'type-based template' + ) + self.assert_parser_conversion( + ArcGISParser(), get_metadata_parser(IsoParser), 'type-based template' ) - fgdc_template = FgdcParser() - fgdc_template.dist_address_type = u'' # Address type not supported for ISO + self.assert_parser_conversion( + IsoParser(), get_metadata_parser(ArcGISParser), 'type-based template' + ) + self.assert_parser_conversion( + IsoParser(), get_metadata_parser(FgdcParser), 'type-based template' + ) self.assert_parser_conversion( - fgdc_template, get_metadata_parser(IsoParser), 'type-based template' + FgdcParser(), get_metadata_parser(ArcGISParser), 'type-based template' + ) + self.assert_parser_conversion( + FgdcParser(), get_metadata_parser(IsoParser), 'type-based template' ) def test_write_template(self): + self.assert_template_after_write(ArcGISParser, self.test_arcgis_file_path) self.assert_template_after_write(FgdcParser, self.test_fgdc_file_path) self.assert_template_after_write(IsoParser, self.test_iso_file_path) @@ -465,7 +536,7 @@ def test_generic_parser(self): def test_specific_parsers(self): """ Ensures code enforces certain behaviors for existing parsers """ - for parser_type in (FgdcParser, IsoParser): + for parser_type in (ArcGISParser, FgdcParser, IsoParser): parser = parser_type() data_map_1 = parser._data_map @@ -481,9 +552,34 @@ def test_specific_parsers(self): parser._data_map.clear() parser.validate() + def test_arcgis_parser(self): + """ Tests behavior unique to the FGDC parser """ + + # Test dates structure defaults + + # Remove multiple dates to ensure range is queried + arcgis_element = get_remote_element(self.arcgis_file) + remove_element(arcgis_element, 'dataIdInfo/dataExt/tempEle/TempExtent/exTemp/TM_Instant', True) + + # Assert that the backup dates are read in successfully + arcgis_parser = ArcGISParser(element_to_string(arcgis_element)) + self.assertEqual(arcgis_parser.dates, {'type': 'range', 'values': ['Date Range Start', 'Date Range End']}) + def test_fgdc_parser(self): """ Tests behavior unique to the FGDC parser """ + # Test dates structure defaults + + # Remove multiple dates to ensure range is queried + fgdc_element = get_remote_element(self.fgdc_file) + remove_element(fgdc_element, 'idinfo/timeperd/timeinfo/mdattim', True) + + # Assert that the backup dates are read in successfully + fgdc_parser = FgdcParser(element_to_string(fgdc_element)) + self.assertEqual(fgdc_parser.dates, {'type': 'range', 'values': ['Date Range Start', 'Date Range End']}) + + # Test contact data structure defaults + contacts_def = get_complex_definitions()[CONTACTS] # Remove the contact organization completely @@ -555,6 +651,13 @@ def test_iso_parser(self): def test_parser_values(self): """ Tests that parsers are populated with the expected values """ + arcgis_element = get_remote_element(self.arcgis_file) + arcgis_parser = ArcGISParser(element_to_string(arcgis_element)) + arcgis_new = ArcGISParser(**TEST_METADATA_VALUES) + + # Test that the two ArcGIS parsers have the same data given the same input file + self.assert_parsers_are_equal(arcgis_parser, arcgis_new) + fgdc_element = get_remote_element(self.fgdc_file) fgdc_parser = FgdcParser(element_to_string(fgdc_element)) fgdc_new = FgdcParser(**TEST_METADATA_VALUES) @@ -570,56 +673,94 @@ def test_parser_values(self): # Test that the two ISO parsers have the same data given the same input file self.assert_parsers_are_equal(iso_parser, iso_new) - # Test that two distinct parsers have the same data given equivalent input files + # Test that all distinct parsers have the same data given equivalent input files + + self.assert_parsers_are_equal(arcgis_parser, fgdc_parser) self.assert_parsers_are_equal(fgdc_parser, iso_parser) + self.assert_parsers_are_equal(iso_parser, arcgis_parser) # Test that each parser's values correspond to the target values - for parser in (fgdc_parser, iso_parser): + for parser in (arcgis_parser, fgdc_parser, iso_parser): parser_type = type(parser) for prop, target in TEST_METADATA_VALUES.items(): self.assert_equal_for(parser_type, prop, getattr(parser, prop), target) def test_parser_conversion(self): + arcgis_parser = ArcGISParser(self.arcgis_metadata) fgdc_parser = FgdcParser(self.fgdc_metadata) iso_parser = IsoParser(self.iso_metadata) + self.assert_parser_conversion(arcgis_parser, fgdc_parser, 'file') + self.assert_parser_conversion(arcgis_parser, iso_parser, 'file') + + self.assert_parser_conversion(fgdc_parser, arcgis_parser, 'file') self.assert_parser_conversion(fgdc_parser, iso_parser, 'file') + + self.assert_parser_conversion(iso_parser, arcgis_parser, 'file') self.assert_parser_conversion(iso_parser, fgdc_parser, 'file') def test_conversion_from_dict(self): + arcgis_parser = ArcGISParser(self.arcgis_metadata) fgdc_parser = FgdcParser(self.fgdc_metadata) iso_parser = IsoParser(self.iso_metadata) - fgdc_parser.dist_address_type = u'' # Address type not supported for ISO - self.assert_parser_conversion( - iso_parser, get_metadata_parser(element_to_dict(fgdc_parser._xml_tree, recurse=True)), 'dict-based' + arcgis_parser, get_metadata_parser(element_to_dict(fgdc_parser._xml_tree, recurse=True)), 'dict-based' + ) + self.assert_parser_conversion( + arcgis_parser, get_metadata_parser(element_to_dict(iso_parser._xml_tree, recurse=True)), 'dict-based' ) + self.assert_parser_conversion( + fgdc_parser, get_metadata_parser(element_to_dict(arcgis_parser._xml_tree, recurse=True)), 'dict-based' + ) self.assert_parser_conversion( fgdc_parser, get_metadata_parser(element_to_dict(iso_parser._xml_tree, recurse=True)), 'dict-based' ) + self.assert_parser_conversion( + iso_parser, get_metadata_parser(element_to_dict(arcgis_parser._xml_tree, recurse=True)), 'dict-based' + ) + self.assert_parser_conversion( + iso_parser, get_metadata_parser(element_to_dict(fgdc_parser._xml_tree, recurse=True)), 'dict-based' + ) + def test_conversion_from_str(self): + arcgis_parser = ArcGISParser(self.arcgis_metadata) fgdc_parser = FgdcParser(self.fgdc_metadata) iso_parser = IsoParser(self.iso_metadata) - fgdc_parser.dist_address_type = u'' # Address type not supported for ISO - self.assert_parser_conversion( - iso_parser, get_metadata_parser(fgdc_parser.serialize()), 'str-based' + arcgis_parser, get_metadata_parser(fgdc_parser.serialize()), 'str-based' + ) + self.assert_parser_conversion( + arcgis_parser, get_metadata_parser(iso_parser.serialize()), 'str-based' ) + self.assert_parser_conversion( + fgdc_parser, get_metadata_parser(arcgis_parser.serialize()), 'str-based' + ) self.assert_parser_conversion( fgdc_parser, get_metadata_parser(iso_parser.serialize()), 'str-based' ) + self.assert_parser_conversion( + iso_parser, get_metadata_parser(arcgis_parser.serialize()), 'str-based' + ) + self.assert_parser_conversion( + iso_parser, get_metadata_parser(fgdc_parser.serialize()), 'str-based' + ) + def test_reparse_complex_lists(self): complex_defs = get_complex_definitions() complex_lists = (ATTRIBUTES, CONTACTS, DIGITAL_FORMS) - for parser in (FgdcParser(self.fgdc_metadata), IsoParser(self.iso_metadata)): + arcgis_parser = ArcGISParser(self.arcgis_metadata) + fgdc_parser = FgdcParser(self.fgdc_metadata) + iso_parser = IsoParser(self.iso_metadata) + + for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty complex lists for prop in complex_lists: @@ -644,7 +785,11 @@ def test_reparse_complex_structs(self): complex_defs = get_complex_definitions() complex_structs = (BOUNDING_BOX, LARGER_WORKS) - for parser in (FgdcParser(self.fgdc_metadata), IsoParser(self.iso_metadata)): + arcgis_parser = ArcGISParser(self.arcgis_metadata) + fgdc_parser = FgdcParser(self.fgdc_metadata) + iso_parser = IsoParser(self.iso_metadata) + + for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty complex structures for prop in complex_structs: @@ -664,7 +809,11 @@ def test_reparse_dates(self): (DATE_TYPE_MULTIPLE, ['first', 'next', 'last']) ) - for parser in (FgdcParser(self.fgdc_metadata), IsoParser(self.iso_metadata)): + arcgis_parser = ArcGISParser(self.arcgis_metadata) + fgdc_parser = FgdcParser(self.fgdc_metadata) + iso_parser = IsoParser(self.iso_metadata) + + for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty dates for empty in (None, {}, {DATE_TYPE: u'', DATE_VALUES: []}): @@ -679,7 +828,11 @@ def test_reparse_dates(self): def test_reparse_keywords(self): - for parser in (FgdcParser(self.fgdc_metadata), IsoParser(self.iso_metadata)): + arcgis_parser = ArcGISParser(self.arcgis_metadata) + fgdc_parser = FgdcParser(self.fgdc_metadata) + iso_parser = IsoParser(self.iso_metadata) + + for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty keywords for keywords in ('', u'', []): @@ -694,7 +847,11 @@ def test_reparse_keywords(self): def test_reparse_process_steps(self): proc_step_def = get_complex_definitions()[PROCESS_STEPS] - for parser in (FgdcParser(self.fgdc_metadata), IsoParser(self.iso_metadata)): + arcgis_parser = ArcGISParser(self.arcgis_metadata) + fgdc_parser = FgdcParser(self.fgdc_metadata) + iso_parser = IsoParser(self.iso_metadata) + + for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty process steps for empty in (None, [], [{}], [{}.fromkeys(proc_step_def, u'')]): @@ -723,13 +880,14 @@ def test_reparse_simple_values(self): simple_props = required_props.difference(complex_props) simple_props = simple_props.difference({KEYWORDS_PLACE, KEYWORDS_THEME}) - fgdc_parser = FgdcParser(self.fgdc_metadata) - iso_parser = IsoParser(self.iso_metadata) - simple_empty_vals = ('', u'', []) simple_valid_vals = (u'value', [u'item', u'list']) - for parser in (fgdc_parser, iso_parser): + arcgis_parser = ArcGISParser(self.arcgis_metadata) + fgdc_parser = FgdcParser(self.fgdc_metadata) + iso_parser = IsoParser(self.iso_metadata) + + for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty values for val in simple_empty_vals: @@ -744,7 +902,7 @@ def test_validate_complex_lists(self): invalid_values = ('', u'', {'x': 'xxx'}, [{'x': 'xxx'}], set(), tuple()) - for parser in (FgdcParser().validate(), IsoParser().validate()): + for parser in (ArcGISParser().validate(), FgdcParser().validate(), IsoParser().validate()): for prop in complex_props: for invalid in invalid_values: self.assert_validates_for(parser, prop, invalid) @@ -754,7 +912,7 @@ def test_validate_complex_structs(self): invalid_values = ('', u'', {'x': 'xxx'}, list(), set(), tuple()) - for parser in (FgdcParser().validate(), IsoParser().validate()): + for parser in (ArcGISParser().validate(), FgdcParser().validate(), IsoParser().validate()): for prop in complex_props: for invalid in invalid_values: self.assert_validates_for(parser, prop, invalid) @@ -771,7 +929,11 @@ def test_validate_dates(self): ('unknown', ['unknown']) ) - for parser in (FgdcParser(self.fgdc_metadata), IsoParser(self.iso_metadata)): + arcgis_parser = ArcGISParser(self.arcgis_metadata) + fgdc_parser = FgdcParser(self.fgdc_metadata) + iso_parser = IsoParser(self.iso_metadata) + + for parser in (arcgis_parser, fgdc_parser, iso_parser): for val in invalid_values: self.assert_validates_for(parser, DATES, {DATE_TYPE: val[0], DATE_VALUES: val[1]}) @@ -781,17 +943,19 @@ def test_validate_simple_values(self): invalid_values = (None, [None], dict(), [dict()], set(), [set()], tuple(), [tuple()]) - for parser in (FgdcParser().validate(), IsoParser().validate()): + for parser in (ArcGISParser().validate(), FgdcParser().validate(), IsoParser().validate()): for prop in simple_props: for invalid in invalid_values: self.assert_validates_for(parser, prop, invalid) def test_write_values(self): + self.assert_parser_after_write(ArcGISParser, self.arcgis_metadata, self.test_arcgis_file_path) self.assert_parser_after_write(FgdcParser, self.fgdc_metadata, self.test_fgdc_file_path) self.assert_parser_after_write(IsoParser, self.iso_metadata, self.test_iso_file_path) def test_write_values_to_template(self): + self.assert_parser_after_write(ArcGISParser, self.arcgis_metadata, self.test_arcgis_file_path, True) self.assert_parser_after_write(FgdcParser, self.fgdc_metadata, self.test_fgdc_file_path, True) self.assert_parser_after_write(IsoParser, self.iso_metadata, self.test_iso_file_path, True) diff --git a/gis_metadata/utils.py b/gis_metadata/utils.py index c343707..a5fdbbe 100644 --- a/gis_metadata/utils.py +++ b/gis_metadata/utils.py @@ -5,7 +5,7 @@ from parserutils.collections import filter_empty, flatten_items, reduce_value, wrap_value from parserutils.elements import get_element, get_elements, get_element_attributes, get_elements_text -from parserutils.elements import element_exists, insert_element, remove_element_attributes, remove_element +from parserutils.elements import insert_element, remove_element_attributes, remove_element from parserutils.elements import XPATH_DELIM from gis_metadata.exceptions import ParserError @@ -216,7 +216,7 @@ def get_default_for_complex(prop, subprop, value, xpath=''): return _join_complex_attr(value) if '@' in xpath else _join_complex_prop(value) -def has_element(elem_to_parse, xpath): +def has_property(elem_to_parse, xpath): """ Parse xpath for any attribute reference "path/@attr" and check for root and presence of attribute. :return: True if xpath is present in the element along with any attribute referenced, otherwise False @@ -227,9 +227,9 @@ def has_element(elem_to_parse, xpath): if not xroot: return False elif not attr: - return element_exists(elem_to_parse, xroot) + return bool(get_elements_text(elem_to_parse, xroot)) else: - return attr in get_element_attributes(elem_to_parse, xroot) + return bool(get_element_attributes(elem_to_parse, xroot).get(attr)) def parse_complex(tree_to_parse, xpath_root, xpath_map, complex_key): @@ -271,57 +271,40 @@ def parse_complex_list(tree_to_parse, xpath_root, xpath_map, complex_key): return complex_list -def parse_dates(tree_to_parse, date_xpath_map, date_type=None, date_xpaths=None, date_values=None): +def parse_dates(tree_to_parse, xpath_map): """ Creates and returns a Dates Dictionary data structure given the parameters provided :param tree_to_parse: the XML tree from which to construct the Dates data structure - :param date_xpath_map: a map containing the following type-specific XPATHs: - multiple, range_begin, range_end, and single - :param date_type: if type is known, use it to determine which XPATHs to parse values - :param date_xpaths: if an array of XPATHs is provided, use them to parse values from tree_to_parse - :param date_values: if values are already parsed, use them to construct a Dates data structure + :param xpath_map: a map containing the following type-specific XPATHs: + multiple, range, range_begin, range_end, and single """ - if date_type is None or date_xpaths is None: - # Pull the intended XPATHs out of the map + # Determine dates to query based on metadata elements - dt_multiple_xpath = date_xpath_map[DATE_TYPE_MULTIPLE] - dt_range_beg_xpath = date_xpath_map[DATE_TYPE_RANGE_BEGIN] - dt_range_end_xpath = date_xpath_map[DATE_TYPE_RANGE_END] - dt_single_xpath = date_xpath_map[DATE_TYPE_SINGLE] + values = wrap_value(parse_property(tree_to_parse, None, xpath_map, DATE_TYPE_SINGLE)) + if len(values) == 1: + return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} + elif len(values) > 1: + return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} - if date_type is None: - # Determine dates type based on metadata elements + values = wrap_value(parse_property(tree_to_parse, None, xpath_map, DATE_TYPE_MULTIPLE)) + if len(values) == 1: + return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} + elif len(values) > 1: + return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} - if element_exists(tree_to_parse, dt_multiple_xpath): - date_type = DATE_TYPE_MULTIPLE - elif (element_exists(tree_to_parse, dt_range_beg_xpath) and - element_exists(tree_to_parse, dt_range_end_xpath)): - date_type = DATE_TYPE_RANGE - elif element_exists(tree_to_parse, dt_single_xpath): - date_type = DATE_TYPE_SINGLE - else: - return {} - - if date_xpaths is None: - # Determine XPATHs from dates type - - if date_type == DATE_TYPE_MULTIPLE: - date_xpaths = [dt_multiple_xpath] - elif date_type == DATE_TYPE_RANGE: - date_xpaths = [dt_range_beg_xpath, dt_range_end_xpath] - elif date_type == DATE_TYPE_SINGLE: - date_xpaths = [dt_single_xpath] - - date_xpaths = filter_empty(date_xpaths, []) - - if date_values is None: - date_values = [text for xpath in date_xpaths for text in get_elements_text(tree_to_parse, xpath)] - - if len(date_values) == 1: - date_type = DATE_TYPE_SINGLE + values = flatten_items( + d for x in (DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END) + for d in wrap_value(parse_property(tree_to_parse, None, xpath_map, x)) + ) + if len(values) == 1: + return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} + elif len(values) == 2: + return {DATE_TYPE: DATE_TYPE_RANGE, DATE_VALUES: values} + elif len(values) > 2: + return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} - return {DATE_TYPE: date_type, DATE_VALUES: date_values} + return {} def parse_property(tree_to_parse, xpath_root, xpath_map, prop): @@ -330,7 +313,7 @@ def parse_property(tree_to_parse, xpath_root, xpath_map, prop): :param tree_to_parse: the XML tree compatible with element_utils to be parsed :param xpath_root: used to determine the relative XPATH location within the parent element :param xpath_map: a dict of XPATHs that may contain alternate locations for a property - :param complex_key: indicates which complex definition describes the structure + :param prop: the property to parse: corresponds to a key in xpath_map """ xpath = xpath_map[prop] @@ -346,8 +329,8 @@ def parse_property(tree_to_parse, xpath_root, xpath_map, prop): parsed = None - if not has_element(tree_to_parse, xpath): - # Element is not present in tree: try next alternate location + if not has_property(tree_to_parse, xpath): + # Element has no text: try next alternate location alternate = '_' + prop if alternate in xpath_map: @@ -408,12 +391,12 @@ def _update_property(tree_to_update, xpath_root, xpaths, prop, values): def update_element(elem, idx, root, path, vals): """ Internal helper function to encapsulate single item update """ - has_root = (root and len(path) > len(root) and path.startswith(root)) + has_root = bool(root and len(path) > len(root) and path.startswith(root)) path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr') if attr: removed = [get_element(elem, path)] - remove_element_attributes(removed, attr) + remove_element_attributes(removed[0], attr) elif not has_root: removed = wrap_value(remove_element(elem, path)) else: