Skip to content

Commit

Permalink
Streamlined, supported fields, attribute references, template tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dharvey-consbio committed Oct 12, 2016
1 parent 49fc900 commit d1cd958
Show file tree
Hide file tree
Showing 8 changed files with 439 additions and 361 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ fgdc_from_file.digital_forms
fgdc_from_file.larger_works
fgdc_from_file.process_steps

# :see: gis_metadata.utils._required_keys for list of all properties
# :see: gis_metadata.utils.get_supported_props for list of all supported properties

# Update properties
fgdc_from_file.title = 'New Title'
Expand Down
70 changes: 20 additions & 50 deletions gis_metadata/fgdc_metadata_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,16 @@
from gis_metadata.utils import PROCESS_STEPS
from gis_metadata.utils import ParserProperty

from gis_metadata.utils import format_xpaths, get_complex_definitions, parse_complex
from gis_metadata.utils import format_xpaths, get_complex_definitions


FGDC_ROOT = 'metadata'

_fgdc_definitions = get_complex_definitions()
_fgdc_definitions[CONTACTS].update({
'_name': '{_name}',
'_organization': '{_organization}'
})

_fgdc_tag_formats = {
'_attributes_root': 'eainfo/detailed/attr',
Expand All @@ -44,9 +48,11 @@
'originators': 'idinfo/citation/citeinfo/origin',
'publish_date': 'idinfo/citation/citeinfo/pubdate',
'data_credits': 'idinfo/datacred',
CONTACTS: 'idinfo/ptcontac/{ct_path}',
'dist_contact_org': 'distinfo/distrib/cntinfo/{org}/cntorg',
'dist_contact_person': 'distinfo/distrib/cntinfo/{person}/cntper',
CONTACTS: 'idinfo/ptcontac/cntinfo/{ct_path}',
'dist_contact_org': 'distinfo/distrib/cntinfo/cntperp/cntorg',
'_dist_contact_org': 'distinfo/distrib/cntinfo/cntorgp/cntorg',
'dist_contact_person': 'distinfo/distrib/cntinfo/cntperp/cntper',
'_dist_contact_person': 'distinfo/distrib/cntinfo/cntorgp/cntper',
'dist_address_type': 'distinfo/distrib/cntinfo/cntaddr/addrtype',
'dist_address': 'distinfo/distrib/cntinfo/cntaddr/address',
'dist_city': 'distinfo/distrib/cntinfo/cntaddr/city',
Expand Down Expand Up @@ -78,8 +84,6 @@
class FgdcParser(MetadataParser):
""" A class to parse metadata files conforming to the FGDC standard """

DEFAULT_CONTACT_TAG = 'cntperp'

def _init_data_map(self):
""" OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """

Expand All @@ -96,20 +100,9 @@ def _init_data_map(self):
if fgdc_root != FGDC_ROOT:
raise ParserError('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root)

fgdc_data_map = {'root': FGDC_ROOT}
fgdc_data_map = {'_root': FGDC_ROOT}
fgdc_data_structures = {}

# Capture contact information that may differ per document

cntinfo = 'idinfo/ptcontac/cntinfo/{contact}'

if self._has_element(cntinfo.format(contact='cntorgp')):
contact = 'cntorgp'
elif self._has_element(cntinfo.format(contact='cntperp')):
contact = 'cntperp'
else:
contact = FgdcParser.DEFAULT_CONTACT_TAG

# Capture and format other complex XPATHs

ad_format = _fgdc_tag_formats[ATTRIBUTES]
Expand All @@ -133,10 +126,12 @@ def _init_data_map(self):
ct_format = _fgdc_tag_formats[CONTACTS]
fgdc_data_structures[CONTACTS] = format_xpaths(
_fgdc_definitions[CONTACTS],
name=ct_format.format(ct_path='cntinfo/{ct}/cntper'.format(ct=contact)),
organization=ct_format.format(ct_path='cntinfo/{ct}/cntorg'.format(ct=contact)),
position=ct_format.format(ct_path='cntinfo/cntpos'.format(ct=contact)),
email=ct_format.format(ct_path='cntinfo/cntemail')
name=ct_format.format(ct_path='cntperp/cntper'),
_name=ct_format.format(ct_path='cntorgp/cntper'),
organization=ct_format.format(ct_path='cntperp/cntorg'),
_organization=ct_format.format(ct_path='cntorgp/cntorg'),
position=ct_format.format(ct_path='cntpos'),
email=ct_format.format(ct_path='cntemail')
)

dt_format = _fgdc_tag_formats[DATES]
Expand Down Expand Up @@ -186,46 +181,21 @@ def _init_data_map(self):
fgdc_data_formats = dict(_fgdc_tag_formats)

for prop, xpath in iteritems(fgdc_data_formats):
if prop in [ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS]:
if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS):
fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

elif prop == BOUNDING_BOX:
fgdc_data_map[prop] = ParserProperty(self._parse_bounding_box, self._update_complex)

elif prop == 'dist_contact_org':
fgdc_data_map[prop] = xpath.format(org=contact)

elif prop == 'dist_contact_person':
fgdc_data_map[prop] = xpath.format(person=contact)
elif prop in (BOUNDING_BOX, LARGER_WORKS):
fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

elif prop == DATES:
fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

elif prop == LARGER_WORKS:
fgdc_data_map[prop] = ParserProperty(self._parse_larger_works, self._update_complex)

else:
fgdc_data_map[prop] = xpath

self._data_map = fgdc_data_map
self._data_structures = fgdc_data_structures

def _parse_bounding_box(self, prop=BOUNDING_BOX):
""" Overridden to set xpath root to None when parsing bounding box """

xpath_root = None
xpath_map = self._data_structures[prop]

return parse_complex(self._xml_tree, xpath_root, xpath_map, prop)

def _parse_larger_works(self, prop=LARGER_WORKS):
""" Overridden to set xpath root to None when parsing larger works """

xpath_root = None
xpath_map = self._data_structures[prop]

return parse_complex(self._xml_tree, xpath_root, xpath_map, prop)

def _update_dates(self, **update_props):
"""
Update operation for FGDC Dates metadata
Expand Down
41 changes: 17 additions & 24 deletions gis_metadata/iso_metadata_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@
from gis_metadata.utils import ParserProperty

from gis_metadata.utils import format_xpaths, get_complex_definitions, get_xpath_branch
from gis_metadata.utils import parse_complex, parse_complex_list
from gis_metadata.utils import update_complex_list, update_property
from gis_metadata.utils import parse_complex_list, update_complex_list, update_property


xrange = getattr(six.moves, 'xrange')
Expand Down Expand Up @@ -82,7 +81,7 @@
# Property-specific xpath roots: the base from which each element repeats
'_bounding_box_root': '{_idinfo_extent}/geographicElement',
'_contacts_root': '{_idinfo_resp}',
'_dates_root': '{_idinfo_extent}/temporalElement/EX_TemporalExtent/extent',
'_dates_root': '{_idinfo_extent}/temporalElement',
'_digital_form_content_root': '{_contentinfo_coverage}',
'_distribution_format_root': '{_distinfo}/distributionFormat',
'_transfer_options_root': '{_distinfo}/transferOptions',
Expand All @@ -103,14 +102,15 @@
CONTACTS: '{_idinfo_resp}/{{ct_path}}',
'dist_contact_org': '{_distinfo_resp}/organisationName/CharacterString',
'dist_contact_person': '{_distinfo_resp}/individualName/CharacterString',
'dist_address_type': '', # Not available in ISO-19115
'dist_address_type': '{_distinfo_resp_contact}/address/@type',
'dist_address': '{_distinfo_resp_contact}/address/CI_Address/deliveryPoint/CharacterString',
'dist_city': '{_distinfo_resp_contact}/address/CI_Address/city/CharacterString',
'dist_state': '{_distinfo_resp_contact}/address/CI_Address/administrativeArea/CharacterString',
'dist_postal': '{_distinfo_resp_contact}/address/CI_Address/postalCode/CharacterString',
'dist_country': '{_distinfo_resp_contact}/address/CI_Address/country/CharacterString',
'_dist_country': '{_distinfo_resp_contact}/address/CI_Address/country/Country', # If not in CharacterString
'dist_phone': '{_distinfo_resp_contact}/phone/CI_Telephone/voice/CharacterString',
'dist_email': '{_distinfo_resp_contact}/address/CI_Address/electronicMailAddressy/CharacterString',
'dist_email': '{_distinfo_resp_contact}/address/CI_Address/electronicMailAddress/CharacterString',
'dist_liability': '{_idinfo}/resourceConstraints/MD_LegalConstraints/otherConstraints/CharacterString',
'processing_fees': '{_distinfo_proc}/fees/CharacterString',
'processing_instrs': '{_distinfo_proc}/orderingInstructions/CharacterString',
Expand Down Expand Up @@ -172,7 +172,7 @@ def _init_data_map(self):
if iso_root not in ISO_ROOTS:
raise ParserError('Invalid XML root for ISO-19115 standard: {root}', root=iso_root)

iso_data_map = {'root': iso_root}
iso_data_map = {'_root': iso_root}
iso_data_map.update(_iso_tag_roots)
iso_data_map.update(_iso_tag_formats)

Expand Down Expand Up @@ -258,7 +258,9 @@ def _init_data_map(self):
_iso_definitions[PROCESS_STEPS],
description=ps_format.format(ps_path='description/CharacterString'),
date=ps_format.format(ps_path='dateTime/DateTime'),
sources=ps_format.format(ps_path='source/LI_Source/sourceCitation/CI_Citation/alternateTitle')
sources=ps_format.format(
ps_path='source/LI_Source/sourceCitation/CI_Citation/alternateTitle/CharacterString'
)
)

# Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map
Expand All @@ -267,13 +269,13 @@ def _init_data_map(self):
if prop == ATTRIBUTES:
iso_data_map[prop] = ParserProperty(self._parse_attribute_details, self._update_attribute_details)

elif prop in ['attribute_accuracy', 'dataset_completeness']:
iso_data_map[prop] = ParserProperty(self._parse_property, self._update_report_item, xpath)
elif prop in ('attribute_accuracy', 'dataset_completeness'):
iso_data_map[prop] = ParserProperty(self._parse_property, self._update_report_item, xpath=xpath)

elif prop in [CONTACTS, PROCESS_STEPS]:
elif prop in (CONTACTS, PROCESS_STEPS):
iso_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

elif prop == BOUNDING_BOX:
elif prop in (BOUNDING_BOX, LARGER_WORKS):
iso_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

elif prop == DATES:
Expand All @@ -282,9 +284,6 @@ def _init_data_map(self):
elif prop == DIGITAL_FORMS:
iso_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms)

elif prop == LARGER_WORKS:
iso_data_map[prop] = ParserProperty(self._parse_larger_works, self._update_complex)

elif prop in [KEYWORDS_PLACE, KEYWORDS_THEME]:
iso_data_map[prop] = ParserProperty(self._parse_keywords, self._update_keywords)

Expand Down Expand Up @@ -450,14 +449,6 @@ def _parse_keywords(self, prop):

return keywords

def _parse_larger_works(self, prop=LARGER_WORKS):
""" Overridden to set xpath root to None when parsing larger works """

xpath_root = None
xpath_map = self._data_structures[prop]

return parse_complex(self._xml_tree, xpath_root, xpath_map, prop)

def _update_attribute_details(self, **update_props):
""" Update operation for ISO Attribute Details metadata (standard 19110) """

Expand Down Expand Up @@ -621,6 +612,7 @@ def update(self, use_template=False, **metadata_defaults):
self.validate()

tree_to_update = self._xml_tree if not use_template else self._get_template(**metadata_defaults)
supported_props = self._metadata_props

# Iterate over keys, and extract non-primitive root for all XPATHs
# xroot = identificationInfo/MD_DataIdentification/abstract/
Expand All @@ -629,10 +621,11 @@ def update(self, use_template=False, **metadata_defaults):
# This prevents multiple primitive tags from being inserted under an element

for prop, xpath in iteritems(self._data_map):
if not prop.startswith('_'):
if not prop.startswith('_') or prop.strip('_') in supported_props:
# Send only public or alternate properties
xroot = self._trim_xpath(xpath)
values = getattr(self, prop, u'')
update_property(tree_to_update, xroot, xpath, prop, values)
update_property(tree_to_update, xroot, xpath, prop, values, supported_props)

return tree_to_update

Expand Down
Loading

0 comments on commit d1cd958

Please sign in to comment.