Skip to content

Commit

Permalink
Added ArcGISParser, test data, and full test coverage.
Browse files Browse the repository at this point in the history
  • Loading branch information
dharvey-consbio committed Oct 20, 2016
1 parent f230c8f commit 8db53bb
Show file tree
Hide file tree
Showing 10 changed files with 955 additions and 117 deletions.
10 changes: 5 additions & 5 deletions docs/arcgis-metadata.dtd
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@
<!ELEMENT dataSetFn (OnFunctCd)>

<!-- Metadata.dataIdInfo: Resource identification information (data identification elements) -->
<!ELEMENT dataIdInfo (DataIdent | (aggrInfo?, dataChar?, dataExt+, dataLang+, dataScale*, spatRpType, (deFormat | dsFormat), envirDesc?, idAbs, idCitation, idCredit*, idPoC, idPurp, idSpecUse?, idStatus, graphOver?, resConst*, resMaint*, suppInfo?, descKeys*, discKeys*, otherKeys*, placeKeys*, productKeys*, searchKeys*, stratKeys*, subTopicCatKeys*, tempKeys*, themeKeys*, tpCat, svAccProps?, svCouplRes?, svCoupleType?, svOper?, svOperOn?, svType?, svTypeVer?))>
<!ELEMENT DataIdent (aggrInfo?, dataChar?, dataExt+, dataLang+, dataScale*, spatRpType, (deFormat | dsFormat), envirDesc?, idAbs, idCitation, idCredit*, idPoC, idPurp, idSpecUse?, idStatus, graphOver?, resConst*, resMaint*, suppInfo?, descKeys*, discKeys*, otherKeys*, placeKeys*, productKeys*, searchKeys*, stratKeys*, subTopicCatKeys*, tempKeys*, themeKeys*, tpCat, svAccProps?, svCouplRes?, svCoupleType?, svOper?, svOperOn?, svType?, svTypeVer?)>
<!ELEMENT dataIdInfo (DataIdent | (aggrInfo?, dataChar?, dataExt+, dataLang+, dataScale*, spatRpType, (deFormat | dsFormat), envirDesc?, idAbs, idCitation, idCredit*, idPoC*, idPurp, idSpecUse?, idStatus, graphOver?, resConst*, resMaint*, suppInfo?, descKeys*, discKeys*, otherKeys*, placeKeys*, productKeys*, searchKeys*, stratKeys*, subTopicCatKeys*, tempKeys*, themeKeys*, tpCat, svAccProps?, svCouplRes?, svCoupleType?, svOper?, svOperOn?, svType?, svTypeVer?))>
<!ELEMENT DataIdent (aggrInfo?, dataChar?, dataExt+, dataLang+, dataScale*, spatRpType, (deFormat | dsFormat), envirDesc?, idAbs, idCitation, idCredit*, idPoC*, idPurp, idSpecUse?, idStatus, graphOver?, resConst*, resMaint*, suppInfo?, descKeys*, discKeys*, otherKeys*, placeKeys*, productKeys*, searchKeys*, stratKeys*, subTopicCatKeys*, tempKeys*, themeKeys*, tpCat, svAccProps?, svCouplRes?, svCoupleType?, svOper?, svOperOn?, svType?, svTypeVer?)>

<!-- Metadata.contInfo: Remotely sensed image content -->
<!ELEMENT contInfo (FetCatDesc | ImgDesc)>
Expand Down Expand Up @@ -435,7 +435,7 @@
<!-- DataIdentification.svOperOn: Service method description (service operation elements) -->
<!ELEMENT svOper (svOpName, svDCP?, svDesc, svInvocName, svParams*, svConPt, svOper)>
<!-- DataIdentification.svOperOn: Citation for a resource on which the service operates (data identification elements) -->
<!ELEMENT svOperOn (aggrInfo?, dataChar?, dataExt+, dataLang+, dataScale*, spatRpType, dsFormat, envirDesc?, idAbs, idCitation, idCredit+, idPoC, idPurp, idSpecUse, idStatus, graphOver?, resConst*, resMaint+, suppInfo?, discKeys*, otherKeys*, placeKeys+, productKeys*, searchKeys*, stratKeys*, subTopicCatKeys*, tempKeys*, themeKeys+, tpCat, svAccProps?, svCouplRes, svCoupleType, svOper, svOperOn, svType, svTypeVer?)>
<!ELEMENT svOperOn (aggrInfo?, dataChar?, dataExt+, dataLang+, dataScale*, spatRpType, dsFormat, envirDesc?, idAbs, idCitation, idCredit+, idPoC*, idPurp, idSpecUse, idStatus, graphOver?, resConst*, resMaint+, suppInfo?, discKeys*, otherKeys*, placeKeys+, productKeys*, searchKeys*, stratKeys*, subTopicCatKeys*, tempKeys*, themeKeys+, tpCat, svAccProps?, svCouplRes, svCoupleType, svOper, svOperOn, svType, svTypeVer?)>
<!ELEMENT svType (#PCDATA)>
<!ATTLIST svType codeSpace CDATA>
<!ELEMENT svTypeVer (#PCDATA)>
Expand Down Expand Up @@ -690,7 +690,7 @@

<!-- Data Source Elements -->

<!ELEMENT stepDesc (#PCDATA)>
<!ELEMENT srcDesc (#PCDATA)>
<!ELEMENT srcMedName (MedNameCd)>
<!ELEMENT MedNameCd (#PCDATA)>
<!ATTLIST MedNameCd value CDATA>
Expand Down Expand Up @@ -768,7 +768,7 @@

<!ELEMENT unitsODist (#PCDATA)>
<!ELEMENT transSize (#PCDATA)>
<!ELEMENT onLineSrc (linkage | #PCDATA)>
<!ELEMENT onLineSrc (linkage, protocol?, appProfile?, orName?, orDesc?, orFunct?)>
<!ELEMENT offLineMed (medName, medDensity*, medDenUnits?, medVol?, medFormat*, medNote?)>
<!ELEMENT medName (medNameCd)>
<!ELEMENT medNameCd (#PCDATA)>
Expand Down
379 changes: 379 additions & 0 deletions gis_metadata/arcgis_metadata_parser.py

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions gis_metadata/fgdc_metadata_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ def _init_data_map(self):
DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'),
DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate')
}
fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [
fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
fgdc_data_structures[DATES][DATE_TYPE_RANGE_END]
]

df_format = _fgdc_tag_formats[DIGITAL_FORMS]
fgdc_data_structures[DIGITAL_FORMS] = format_xpaths(
Expand Down
4 changes: 4 additions & 0 deletions gis_metadata/iso_metadata_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,10 @@ def _init_data_map(self):
DATE_TYPE_RANGE_END: dt_format.format(type_path='TimePeriod/end/TimeInstant/timePosition'),
DATE_TYPE_SINGLE: dt_format.format(type_path='TimeInstant/timePosition') # Same as multiple
}
iso_data_structures[DATES][DATE_TYPE_RANGE] = [
iso_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
iso_data_structures[DATES][DATE_TYPE_RANGE_END]
]

df_format = iso_data_map[DIGITAL_FORMS]
iso_data_structures[DIGITAL_FORMS] = format_xpaths(
Expand Down
43 changes: 19 additions & 24 deletions gis_metadata/metadata_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@
from copy import deepcopy
from six import iteritems

from parserutils.elements import create_element_tree, element_to_dict, element_to_string
from parserutils.elements import create_element_tree, element_exists, element_to_string
from parserutils.elements import remove_element, write_element, strip_namespaces
from parserutils.elements import get_element_name, get_element_tree, get_elements
from parserutils.elements import get_element_name, get_element_tree
from parserutils.strings import DEFAULT_ENCODING

from gis_metadata.utils import DATES, DATE_TYPE, DATE_VALUES
from gis_metadata.utils import DATE_TYPE_RANGE, DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END
from gis_metadata.utils import has_element, parse_complex, parse_complex_list, parse_dates, parse_property
from gis_metadata.utils import parse_complex, parse_complex_list, parse_dates, parse_property
from gis_metadata.utils import update_complex, update_complex_list, update_property, validate_any, validate_properties
from gis_metadata.utils import _supported_props, ParserError


# Place holders for lazy, one-time FGDC & ISO imports

ArcGISParser, ARCGIS_ROOTS = None, None
ArcGISParser, ARCGIS_ROOTS, ARCGIS_NODES = None, None, None
FgdcParser, FGDC_ROOT = None, None
IsoParser, ISO_ROOTS = None, None
VALID_ROOTS = None
Expand Down Expand Up @@ -60,10 +60,17 @@ def get_metadata_parser(metadata_container, **metadata_defaults):

# The get_parsed_content method ensures only these roots will be returned

if xml_root == FGDC_ROOT:
return FgdcParser(xml_tree, **metadata_defaults)
elif xml_root in ISO_ROOTS:
if xml_root in ISO_ROOTS:
return IsoParser(xml_tree, **metadata_defaults)
else:
has_arcgis_data = any(element_exists(xml_tree, e) for e in ARCGIS_NODES)

if xml_root == FGDC_ROOT and not has_arcgis_data:
return FgdcParser(xml_tree, **metadata_defaults)
elif xml_root in ARCGIS_ROOTS:
return ArcGISParser(xml_tree, **metadata_defaults)

return None


def get_parsed_content(metadata_content):
Expand Down Expand Up @@ -114,6 +121,7 @@ def get_parsed_content(metadata_content):
def _import_parsers():
""" Lazy imports to prevent circular dependencies between this module and utils """

global ARCGIS_NODES
global ARCGIS_ROOTS
global ArcGISParser

Expand All @@ -125,10 +133,10 @@ def _import_parsers():

global VALID_ROOTS

if ARCGIS_ROOTS is None or ArcGISParser is None:
ARCGIS_ROOTS = tuple()
# from gis_metadata.arcgis_metadata_parser import ARCGIS_ROOTS
# from gis_metadata.arcgis_metadata_parser import ArcGISParser
if ARCGIS_NODES is None or ARCGIS_ROOTS is None or ArcGISParser is None:
from gis_metadata.arcgis_metadata_parser import ARCGIS_NODES
from gis_metadata.arcgis_metadata_parser import ARCGIS_ROOTS
from gis_metadata.arcgis_metadata_parser import ArcGISParser

if FGDC_ROOT is None or FgdcParser is None:
from gis_metadata.fgdc_metadata_parser import FGDC_ROOT
Expand Down Expand Up @@ -262,11 +270,6 @@ def _get_xroot_for(self, prop):

return self._get_xpath_for('_{prop}_root'.format(prop=prop))

def _has_element(self, prop):
""" :return: True if the data map property exists in the XML tree, False otherwise """

return has_element(self._xml_tree, self._get_xpath_for(prop))

def _parse_complex(self, prop):
""" Default parsing operation for a complex struct """

Expand All @@ -288,14 +291,6 @@ def _parse_dates(self, prop=DATES):

return parse_dates(self._xml_tree, self._data_structures[prop])

def _parse_elements(self, prop, attr=None):
""" :return: the element for the XPATH corresponding to prop as a dict """

xpath = self._get_xpath_for(prop)
parsed = [element_to_dict(e, recurse=False) for e in get_elements(self._xml_tree, xpath)]

return parsed if attr is None else parsed.get(attr)

def _update_complex(self, **update_props):
""" Default update operation for a complex struct """

Expand Down

0 comments on commit 8db53bb

Please sign in to comment.