### run the new processors for routing and dict building

In [145]:
%reload_ext autoreload
%autoreload 2

import os
import json
from semproc.process_router import Router
from semproc.rawresponse import RawResponse


In [133]:
def _prep_response(filename):
    # open it and get rid of all the newline junk.
    with open(filename, 'r') as f:
        response = f.read()
    
    response = response.replace('\\\n', '').replace('\r\n', '').replace('\\r', '').replace('\\n', '').replace('\n', '')
    return response.decode('utf-8', errors='replace').encode('unicode_escape')


In [134]:
### opensearch osdd
identity = {
    "protocol": "OpenSearch",
    "service": {
        "name": "OpenSearchDescription",
        "version": "1.1"
    }
}
url = 'http://www.example.com/opensearch.xml'
response = _prep_response('../response_examples/opensearch_blended_parameters.xml')

router = Router(identity, response, url)

print type(router.reader)


<class 'semproc.preprocessors.opensearch_preprocessors.OpenSearchReader'>


In [135]:
router.reader.parse()
router.reader.description

{'service': {'endpoints': [{'actionable': 'NOPE',
    'mimetype': 'application/opensearchdescription+xml',
    'parameters': [{'name': 'format', 'type': 'opensearch'},
     {'name': 'option', 'type': 'com_search'},
     {'name': 'view', 'type': 'remind'}],
    'template': 'http://www.ceos.org/index.php?option=com_search&view=remind&format=opensearch',
    'url': 'http://www.ceos.org/index.php?'},
   {'actionable': 'NOPE',
    'mimetype': 'text/html',
    'parameters': [{'name': 'option', 'type': 'com_search'},
     {'name': 'searchword', 'type': '{searchTerms}'}],
    'template': 'http://www.ceos.org/index.php?option=com_search&searchword={searchTerms}',
    'url': 'http://www.ceos.org/index.php?option=com_search&searchword='}],
  'title': ['CEOS']}}

In [37]:
### opensearch atom feed
identity = {
    "protocol": "OpenSearch",
    "resultset": {
        "dialect": "ATOM",
        "version": "1.1"
    }
}
url = 'http://www.example.com/opensearch.atom'
response = _prep_response('../response_examples/opensearch_usgs_search_atom.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.opensearch_preprocessors.OpenSearchReader'>


In [38]:
router.reader.parse()
router.reader.description

{'children': {'items': [{'contents': [{'content': '<div> This geologic map is a product of a cooperative project between theU.S. Geological Survey and the U.S. National Park Service to providegeologic information about this part of Canyonlands National Park, Utah.This digital map database contains bedrock data from previously publisheddata that has been modified by the author. New mapping of the surficialdeposits represents the general distribution of surficial deposits of theDruid Arch and The Loop 7.5-minute quadrangles.</div>',
      'type': 'html'}],
    'id': 'https://www.sciencebase.gov/catalog/item/5287d495e4b03b89f6f1a088',
    'links': [{'href': 'https://www.sciencebase.gov/catalog/item/5287d495e4b03b89f6f1a088.atom',
      'rel': 'self'},
     {'href': 'https://www.sciencebase.gov/catalog/item/5287d495e4b03b89f6f1a088',
      'rel': ''},
     {'href': 'https://www.sciencebase.gov/catalog/item/feedMap/5287d495e4b03b89f6f1a088',
      'rel': 'related'}],
    'published': '2013-

In [39]:
### oai-pmh identify
identity = {
    "protocol": "OAI-PMH",
    "service": {
        "name": "OAI-PMH",
        "request": "Identify"
    }
}
url = 'http://www.example.com/oai-pmh?verb=Identify'
response = _prep_response('../response_examples/oaipmh_identify.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.oaipmh_preprocessors.OaiPmhReader'>


In [40]:
router.reader.parse()
router.reader.description

{'service': {'endpoints': [{'url': 'http://aura.abdn.ac.uk/dspace-oai/request'}],
  'title': ['Aberdeen University Research Archive'],
  'version': ['2.0']}}

In [52]:
### oai-pmh listrecords
identity = {
    "protocol": "OAI-PMH",
    "resultset": {
        "name": "OAI-PMH",
        "request": "ListRecords",
        "dialect": "oai_dc"
    }
}
url = 'http://www.example.com/oai-pmh?verb=ListRecords'
response = _prep_response('../response_examples/oaipmh_listrecords.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.oaipmh_preprocessors.OaiPmhReader'>


In [53]:
router.reader.parse()
router.reader.description

{'children': [{'abstract': 'Mar-94',
   'creator': 'Gita Sen',
   'identifier': 'oai:ewubd:admin-mprhgdco:HASHfd2d9458117841512f40e3',
   'publisher': 'Harvard Center for Population and Development Studies',
   'sources': ['http://www.jstor.org/stable/pdfplus/2137608.pdf'],
   'timestamp': '2014-03-04',
   'title': 'Population Policies Reconsidered: Health, Employment and Rights',
   'types': ['Full text']},
  {'abstract': '1984',
   'creator': 'John Bongaarts, Odile Frank, Ron Lesthaeghe',
   'identifier': 'oai:ewubd:admin-mprhgdco:HASH163d9511160619a8a6bcc9',
   'publisher': 'Population Council',
   'sources': ['http://www.jstor.org/stable/pdfplus/1973518.pdf'],
   'timestamp': '2014-03-04',
   'title': 'The Proximate Determinants of Fertility in sub- Saharan Africa',
   'types': ['Full text']},
  {'abstract': '2003',
   'identifier': 'oai:ewubd:admin-mprhgdco:HASH01ef7f01771045b6085cec77',
   'publisher': 'Population and Development Strategies, UNFPA',
   'sources': ['http://www.unf

In [71]:
### ogc wms
identity = {
    "protocol": "OGC",
    "service": {
        "name": "WMS",
        "request": "GetCapabilities", 
        "version": "1.3.0"
    }
}
url = 'http://www.example.com/wms?service=wms&request=getcapabilities&version=1.3.0'
response = _prep_response('../response_examples/wms_v1_3_0.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.ogc_preprocessors.OgcReader'>


In [72]:
router.reader.parse()
router.reader.description

{'service': {'abstract': ['WMS'],
  'contact': [''],
  'endpoints': [{'actionable': 1,
    'mimeType': ['application/vnd.ogc.wms_xml', 'text/xml'],
    'name': 'GetCapabilities',
    'protocol': 'HTTP GET',
    'url': 'http://encdirect.noaa.gov/arcgis/services/HSD_Web_Services/HSD_Surveys_EstimatedtoMCD/MapServer/WmsServer?SERVICE=WMS&VERSION=1.3.0&REQUEST=GetCapabilities'},
   {'actionable': 2,
    'mimeType': ['image/bmp',
     'image/jpeg',
     'image/tiff',
     'image/png',
     'image/png8',
     'image/png24',
     'image/png32',
     'image/gif',
     'image/svg+xml'],
    'name': 'GetMap',
    'protocol': 'HTTP GET',
    'url': 'http://encdirect.noaa.gov/arcgis/services/HSD_Web_Services/HSD_Surveys_EstimatedtoMCD/MapServer/WmsServer?SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap'},
   {'actionable': 2,
    'mimeType': ['application/vnd.esri.wms_raw_xml',
     'application/vnd.esri.wms_featureinfo_xml',
     'application/vnd.ogc.wms_xml',
     'text/xml',
     'text/html',
     'tex

In [73]:
### ogc wfs
identity = {
    "protocol": "OGC",
    "service": {
        "name": "WFS",
        "request": "GetCapabilities",
        "version": "1.1.0"
    }
}
url = ''
response = _prep_response('../response_examples/wfs_v1_1_0.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.ogc_preprocessors.OgcReader'>


In [74]:
router.reader.parse()
router.reader.description

{'service': {'abstract': ['Estimates of the number and size of undiscovered mineral deposits containing gold, silver, copper, lead, and zinc in the US, by type of deposit.'],
  'contact': ['Peter N. Schweitzer'],
  'endpoints': [{'actionable': 1,
    'mimeType': ['text/xml'],
    'name': 'GetCapabilities',
    'parameters': [{'name': 'service', 'type': 'string', 'values': ['WFS']},
     {'name': 'request', 'type': 'string'},
     {'name': 'version', 'type': 'string'}],
    'protocol': 'HTTP GET',
    'url': 'http://mrdata.usgs.gov/services/nmra?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetCapabilities'},
   {'actionable': 1,
    'mimeType': ['text/xml'],
    'name': 'GetCapabilities',
    'parameters': [{'name': 'service', 'type': 'string', 'values': ['WFS']},
     {'name': 'request', 'type': 'string'},
     {'name': 'version', 'type': 'string'}],
    'protocol': 'HTTP POST',
    'url': 'http://mrdata.usgs.gov/services/nmra?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetCapabilities'},
   {'actionable':

In [88]:
### ogc wfs features
identity = {
    "protocol": "OGC",
    "dataset": {
        "name": "WFS",
        "request": "GetCapabilities",
        "version": "1.1.0"
    }
}
url = ''
response = _prep_response('../response_examples/wfs_v1_1_0.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.ogc_preprocessors.OgcReader'>


In [89]:
router.reader.parse()
router.reader.description

{'datasets': [{'abstract': ['Estimates of the number and size of undiscovered mineral deposits of this type in Alaska'],
   'bbox': ['POLYGON ((-165 24,-165 73,-66 73,-66 24,-165 24))'],
   'metadata_urls': [{'format': None,
     'type': 'FGDC',
     'url': 'http://mrdata.usgs.gov/metadata/nmra.xml'}],
   'name': 'AK_Creede_epithermal_veins',
   'output_formats': ['text/xml; subtype=gml/3.1.1'],
   'spatial_refs': ['urn:ogc:def:crs:EPSG::4326',
    'urn:ogc:def:crs:EPSG::4269',
    'urn:ogc:def:crs:EPSG::4267',
    'urn:ogc:def:crs:EPSG::3857',
    'urn:ogc:def:crs:EPSG::900913',
    'urn:ogc:def:crs:EPSG::102113'],
   'temporal_extent': {},
   'title': ['Permissive tracts in Alaska for Creede epithermal veins']},
  {'abstract': ['Estimates of the number and size of undiscovered mineral deposits of this type in Alaska'],
   'bbox': ['POLYGON ((-165 24,-165 73,-66 73,-66 24,-165 24))'],
   'metadata_urls': [{'format': None,
     'type': 'FGDC',
     'url': 'http://mrdata.usgs.gov/metada

In [90]:
### ogc wcs
identity = {
    "protocol": "OGC",
    "service": {
        "name": "WCS",
        "request": "GetCapabilities",
        "version": "1.1.2"
    }
}
url = ''
response = _prep_response('../response_examples/wcs_v1_1_2.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.ogc_preprocessors.OgcReader'>


In [91]:
router.reader.parse()
router.reader.description

{'service': {'abstract': [None],
  'contact': [None],
  'endpoints': [{'actionable': 1,
    'name': 'GetCapabilities',
    'parameters': [{'name': 'service', 'type': 'string', 'values': ['WCS']},
     {'name': 'request', 'type': 'string'},
     {'name': 'version', 'type': 'string'}],
    'protocol': 'HTTP GET',
    'url': 'http://www.ncddc.noaa.gov/arcgis/services/DataAtlas/NCDC_SeaWinds_Fall/MapServer/WCSServer?SERVICE=WCS&VERSION=1.1.2&REQUEST=GetCapabilities'},
   {'actionable': 1,
    'name': 'GetCapabilities',
    'parameters': [{'name': 'service', 'type': 'string', 'values': ['WCS']},
     {'name': 'request', 'type': 'string'},
     {'name': 'version', 'type': 'string'}],
    'protocol': 'HTTP POST',
    'url': 'http://www.ncddc.noaa.gov/arcgis/services/DataAtlas/NCDC_SeaWinds_Fall/MapServer/WCSServer?SERVICE=WCS&VERSION=1.1.2&REQUEST=GetCapabilities'},
   {'actionable': 2,
    'name': 'DescribeCoverage',
    'parameters': [{'name': 'service', 'type': 'string', 'values': ['WCS']}

In [44]:
### ogc wcs features
identity = {
    "protocol": "OGC",
    "dataset": {
        "name": "WCS",
        "request": "DescribeCoverage",
        "version": "1.0.0"
    }
}
url = ''
response = _prep_response('../response_examples/wcs_v1_0_0_describe_coverage.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.ogc_preprocessors.OgcReader'>


In [45]:
router.reader.parse()
router.reader.description

{'datasets': [{'abstract': 'Earth View 1KM Emissive Bands Scaled Integers',
   'bbox': 'POLYGON ((-180 -90,-180 90,180 90,180 -90,-180 -90))',
   'formats': ['geotiff', 'raw binary', 'jpeg', 'hdf4'],
   'name': 'EV_1KM_Emissive:Day',
   'spatial_refs': {'nativeCRSs': 'EPSG:4326',
    'requestResponseCRSs': 'EPSG:4326'},
   'temporal_extent': {'begin': '2004-10-01T00:00:00',
    'end': '2015-06-23T00:00:00'}}]}

In [31]:
from owscapable.coverage.wcsBase import DescribeCoverageReader
reader = DescribeCoverageReader('1.0.0', '', None, xml=response)

from semproc.geo_utils import *
from osgeo import osr

ll = reader.coverages[0].min_pos
ur = reader.coverages[0].max_pos
srs = reader.coverages[0].srs_urn

srs_epsg = identify_epsg(srs)
print srs_epsg
epsg = define_spref(srs_epsg)
print epsg

osr_srs = osr.SpatialReference()
osr_srs.ImportFromEPSG(int(srs_epsg.split(':')[-1]))


print int(srs_epsg.split(':')[-1]), ' == ', osr_srs.ExportToPrettyWkt()


# ll = map(float, ll.split())
# ur = map(float, ur.split())

# bbox = ll + ur
# geom = bbox_to_geom(bbox)
# reproject(geom, srs, 'EPSG:4326')

EPSG:4326

4326  ==  


In [42]:
import os
from osgeo import gdal

os.environ['GDAL_DATA'] = r'/Library/Frameworks/GDAL.framework/Versions/1.11/Resources/gdal'
gdal.SetConfigOption( "GDAL_DATA", '/Library/Frameworks/GDAL.framework/Versions/1.11/Resources/gdal' )
print 'GDAL_DATA' in os.environ, os.environ['GDAL_DATA'], gdal.GetConfigOption('GDAL_DATA')


o = osr.SpatialReference()
res = o.ImportFromEPSG(4326)
print repr(res)
o.ExportToPrettyWkt()





True /Library/Frameworks/GDAL.framework/Versions/1.11/Resources/gdal /Library/Frameworks/GDAL.framework/Versions/1.11/Resources/gdal
6


''

In [46]:
### ogc csw
identity = {
    "protocol": "OGC",
    "service": {
        "name": "CSW",
        "request": "GetCapabilities",
        "version": "2.0.2"
    }
}
url = ''
response = _prep_response('../response_examples/datagov_csw_202_getcapabilities.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.ogc_preprocessors.OgcReader'>


In [47]:
router.reader.parse()
router.reader.description

{'service': {'abstract': ['This catalog contains metadata for all first-order data, services, and applications harvested from registered metadata collections with data.gov. Data may be referenced from federal, state, local, tribal, academic, commercial, or non-profit organizations.'],
  'contact': ['Data.gov Administrator'],
  'endpoints': [{'actionable': 1,
    'mimeType': ['text/xml'],
    'name': 'GetCapabilities',
    'parameters': [{'name': 'service', 'type': 'string'},
     {'name': 'request', 'type': 'string'},
     {'name': 'version', 'type': 'string'}],
    'protocol': 'HTTP GET',
    'url': 'http://catalog.data.gov/csw?SERVICE=CSW&VERSION=2.0.2&REQUEST=GetCapabilities'},
   {'actionable': 1,
    'mimeType': ['text/xml'],
    'name': 'GetCapabilities',
    'parameters': [{'name': 'service', 'type': 'string'},
     {'name': 'request', 'type': 'string'},
     {'name': 'version', 'type': 'string'}],
    'protocol': 'HTTP POST',
    'url': 'http://catalog.data.gov/csw?SERVICE=CSW&

In [51]:
### ogc csw results
identity = {
    "protocol": "OGC",
    "resultset": {
        "name": "CSW",
        "request": "GetRecords",
        "dialect": "http://www.isotc211.org/2005/gmd",
        "version": "2.0.2"
    }
}
url = ''
response = _prep_response('../response_examples/datagov_csw_202_getrecords_iso.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.ogc_preprocessors.OgcReader'>


In [52]:
router.reader.parse()
router.reader.description

{'children': {'children': [{'contact': {'contact': {'addresses': ['1315 East-West Highway, SSMC3, 4th floor'],
      'city': 'Silver Spring',
      'country': 'USA',
      'email': 'NODC.Services@noaa.gov',
      'phone': '301-713-3277',
      'postal': '20910-3282',
      'state': 'MD'},
     'organization': 'US National Oceanographic Data Center',
     'position': 'NODC User Services'},
    'endpoints': [{'description': 'Navigate directly to the URL for a descriptive web page with download links.',
      'format': {'name': 'Originator data format'},
      'name': 'Details',
      'url': 'http://accession.nodc.noaa.gov/6200313'},
     {'description': 'Navigate directly to the URL for a descriptive web page with download links.',
      'format': {'name': 'Originator data format'},
      'name': 'Metadata',
      'url': 'http://accession.nodc.noaa.gov/oas/6200313'},
     {'description': 'Navigate directly to the URL for data access and direct download.',
      'format': {'name': 'Origin

In [148]:
### thredds
identity = {
    "protocol": "UNIDATA",
    "service": {
        "name": "THREDDS-Catalog",
        "version": "1.1"
    },
    "dataset": {} # haha, this can be empty for thredds
}
url = 'http://www.example.com/opendap/hyrax/TRMM_3Hourly_3B42/1997/365/catalog.xml'
response = _prep_response('../response_examples/thredds_catalog.xml')

router = Router(identity, response, url)

print type(router.reader)

<class 'semproc.preprocessors.thredds_preprocessors.ThreddsReader'>


In [149]:
router.reader.parse()
router.reader.description

# AHAHAHAHA! this is correct for the service description - there's no name/version attribute in this example

{'datasets': {'endpoints': [{'ID': '/opendap/hyrax/TRMM_3Hourly_3B42/1997/365/',
    'children': [{'ID': '/opendap/hyrax/TRMM_3Hourly_3B42/1997/365/3B42.19980101.00.7.HDF.Z',
      'access_serviceName': 'file',
      'access_url': '/TRMM_3Hourly_3B42/1997/365/3B42.19980101.00.7.HDF.Z',
      'actionable': 2,
      'dataSize': '740599',
      'dataSize_units': 'bytes',
      'date': '2012-05-23T18:37:42',
      'date_type': 'modified',
      'name': '3B42.19980101.00.7.HDF.Z',
      'url': []}],
    'name': '/TRMM_3Hourly_3B42/1997/365',
    'parentOf': [],
    'source': 'dataset'},
   {'ID': '/opendap/hyrax/TRMM_3Hourly_3B42/1997/365/3B42.19980101.00.7.HDF.Z',
    'access_serviceName': 'file',
    'access_url': '/TRMM_3Hourly_3B42/1997/365/3B42.19980101.00.7.HDF.Z',
    'actionable': 2,
    'childOf': '/opendap/hyrax/TRMM_3Hourly_3B42/1997/365/',
    'dataSize': '740599',
    'dataSize_units': 'bytes',
    'date': '2012-05-23T18:37:42',
    'date_type': 'modified',
    'name': '3B42.19

In [126]:
%reload_ext autoreload
%autoreload 2
from semproc.preprocessors.thredds_preprocessors import ThreddsReader
from semproc.xml_utils import *

reader = ThreddsReader(identity, response, url)
reader.parse()
reader.description

#extract_elems(reader.parser.xml, ['dataset'])

#reader.parser.xml



{'datasets': {'endpoints': [{'ID': '/opendap/hyrax/TRMM_3Hourly_3B42/1997/365/',
    'children': [{'ID': '/opendap/hyrax/TRMM_3Hourly_3B42/1997/365/3B42.19980101.00.7.HDF.Z',
      'access_serviceName': 'file',
      'access_url': '/TRMM_3Hourly_3B42/1997/365/3B42.19980101.00.7.HDF.Z',
      'actionable': 2,
      'dataSize': '740599',
      'dataSize_units': 'bytes',
      'date': '2012-05-23T18:37:42',
      'date_type': 'modified',
      'name': '3B42.19980101.00.7.HDF.Z',
      'url': ''}],
    'name': '/TRMM_3Hourly_3B42/1997/365',
    'parentOf': [],
    'source': 'dataset'},
   {'ID': '/opendap/hyrax/TRMM_3Hourly_3B42/1997/365/3B42.19980101.00.7.HDF.Z',
    'access_serviceName': 'file',
    'access_url': '/TRMM_3Hourly_3B42/1997/365/3B42.19980101.00.7.HDF.Z',
    'actionable': 2,
    'childOf': '/opendap/hyrax/TRMM_3Hourly_3B42/1997/365/',
    'dataSize': '740599',
    'dataSize_units': 'bytes',
    'date': '2012-05-23T18:37:42',
    'date_type': 'modified',
    'name': '3B42.19

In [109]:
### iso mi/md
identity = {
    "protocol": "ISO",
    "metadata": {
        "name": "19115"
    }
}
url = ''
response = _prep_response('../response_examples/iso-19115_mi.xml')

router = Router(identity, response, url)

print type(router.reader.reader)

<class 'semproc.preprocessors.iso_preprocessors.MxParser'>


In [110]:
router.reader.parse()
router.reader.description

{'abstract': 'The National Oceanic and Atmospheric Administration                            (NOAA) has the statutory mandate to collect hydrographic data in support                            of nautical chart compilation for safe navigation and to provide                            background data for engineers, scientific, and other commercial and                            industrial activities. Hydrographic survey data primarily consist of                            water depths, but may also include features (e.g. rocks, wrecks),                            navigation aids, shoreline identification, and bottom type information.                            NOAA is responsible for archiving and distributing the source data as                            described in this metadata record.',
 'contact': {'contact': {'addresses': ['NOAA/NOS/OCS/HSD',
    '1315 East West Highway,                                                  SSMC3'],
   'city': 'Silver                                  

In [113]:
### iso data series
identity = {
    "protocol": "ISO",
    "metadata": {
        "name": "Data Series"
    }
}
url = ''
response = _prep_response('../response_examples/iso-19115_ds.xml')

router = Router(identity, response, url)

print type(router.reader)

<type 'instance'>


In [114]:
router.reader.parse()
router.reader.description

{'abstract': "NOAA's Electronic Navigational Charts (NOAA ENCs) have been developed to support the marine transportation infrastructure and coastal management. The NOAA ENCs are in S-57, a data standard developed by the International Hydrographic Organization (IHO) to be used for the exchange of digital hydrographic data. Nautical chart features contained within an NOAA ENC provide a detailed representation of the U.S. coastal and marine environment. This data includes coastal topography, bathymetry, landmarks, geographic place names and marine boundaries.For each ENC there is information that includes the tags <name>, <lname> (title), <src_chart> (equivalent raster chart number), <cscale> (equivalent raster chart scale), <coast_guard_districts>  and  <states>. Each ENC dataset is available for download as a .ZIP file. The <zipfile_location> tag includes the URL of download location, and the <zipfile_datetime> tag includes the date and time that the .ZIP file was created. ENCs have edi