OGC triples for WxS GetCapabilities as Catalog Records

In [1]:
%reload_ext autoreload
%autoreload 2

import os
import json
import glob
from semproc.rawresponse import RawResponse
from semproc.parser import Parser
from semproc.preprocessors.ogc_preprocessors import OgcReader
from semproc.serializers.rdfgraphs import RdfGrapher

In [77]:
with open('../inputs/wms_v1.3.0.xml', 'r') as f:
    response = f.read()

rr = RawResponse(response, 'text/xml')
response = rr.clean_raw_content()
    
url = 'http://ferret.pmel.noaa.gov/thredds/wms/las/woa05_monthly/data_ferret.pmel.noaa.gov_thredds_dodsC_data_PMEL_WOA05nc_monthly_o0112mn1.nc.jnl'
identity = {
    "service": {
        "name": "WMS",
        "request": "GetCapabilities",
        "version": [
            "1.3.0"
        ]
    },
    "protocol": "OGC"
}

In [78]:
reader = OgcReader(identity, response, url, {'harvest_date': '2015-09-15T12:45:00Z'})

In [79]:
reader.parse()

In [80]:
reader.description

{'keywords': [{'bcube:hasValue': ['meteorology',
    'atmosphere',
    'climate',
    'ocean',
    'earth science'],
   'object_id': 'urn:uuid:ce64e235-897f-4187-8355-67a9cd43249b'}],
 'services': [{'bcube:dateCreated': '2015-09-15T12:45:00Z',
   'bcube:lastUpdated': '2015-09-15T12:45:00Z',
   'dc:conformsTo': ['http://www.opengis.net/wms',
    'http://schemas.opengis.net/wms/1.3.0/capabilities_1_3_0.xsd'],
   'dc:description': 'Scientific Data',
   'layers': [{'bcube:dateCreated': '2015-09-15T12:45:00Z',
     'bcube:lastUpdated': '2015-09-15T12:45:00Z',
     'dc:description': 'Dissolved Oxygen',
     'dc:spatial': 'POLYGON ((-180.0 -89.5,-180.0 89.5,180.0 89.5,180.0 -89.5,-180.0 -89.5))',
     'dc:title': 'dissolved_oxygen',
     'esip:eastBound': 180.0,
     'esip:northBound': 89.5,
     'esip:southBound': -89.5,
     'esip:westBound': -180.0,
     'object_id': 'urn:uuid:80be5ded-075c-418f-ac4e-fd1581e43fe6',
     'relationships': [{'object_id': 'urn:uuid:e1a5b76a-0e42-4b40-8e41-fef5

In [81]:
grapher = RdfGrapher(reader.description)
grapher.serialize()
print grapher.emit_format()

@prefix bcube: <http://purl.org/BCube/#> .
@prefix bibo: <http://purl.org/ontology/bibo/#> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix dcat: <http://www.w3.org/TR/vocab-dcat/#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix esip: <http://purl.org/esip/#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix vcard: <http://www.w3.org/TR/vcard-rdf/#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<urn:uuid:c5bfcba9-1ca2-48f4-a1ba-da7bd29d77ea> bcube:HTTPStatusCodeValue 200 ;
    bcube:HTTPStatusFamilyCode 200 ;
    bcube:HTTPStatusFamilyType "Success message" ;
    bcube:atTime "2015-09-15T12:45:00Z" ;
    bcube:hasConfidence "Good" ;
    bcube:hasUrlSource "Harvested" ;
    bcube:reasonPhrase "OK" ;
    bcube:validatedOn "2015-09-15T12:45:

## These haven't been updated yet.
Moving on to our other WxS options, starting with WFS.

In [2]:
with open('../inputs/wfs_v1_1_0.xml', 'r') as f:
    response = f.read()

rr = RawResponse(response, 'text/xml')
response = rr.clean_raw_content()
    
url = 'http://ferret.pmel.noaa.gov/thredds/wms/las/woa05_monthly/data_ferret.pmel.noaa.gov_thredds_dodsC_data_PMEL_WOA05nc_monthly_o0112mn1.nc.jnl'
identity = {
    "service": {
        "name": "WFS",
        "request": "GetCapabilities",
        "version": [
            "1.1.0"
        ]
    },
    "protocol": "OGC"
}

reader = OgcReader(identity, response, url, {'harvest_date': '2015-09-15T12:45:00Z'})
reader.parse()
grapher = RdfGrapher(reader.description)
grapher.serialize()
print grapher.emit_format()

@prefix bcube: <http://purl.org/BCube/#> .
@prefix bibo: <http://purl.org/ontology/bibo/#> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix dcat: <http://www.w3.org/TR/vocab-dcat/#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix esip: <http://purl.org/esip/#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix vcard: <http://www.w3.org/TR/vcard-rdf/#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<urn:sha:d2db93911fbfeae23ee061ae4651497702563ea40b6664af45653905> dc:describes <urn:uuid:06c722d9-6065-439a-b930-2bbc0b54d086>,
        <urn:uuid:074d2593-6cf4-47ce-a76e-f51d2a427570>,
        <urn:uuid:0c5c395b-689d-4028-b834-5b84072ab693>,
        <urn:uuid:1170b195-18fa-470e-9d61-9eca3b3e5c05>,
        <urn:uuid:11d26678-26dd-4677-8839-44a35e79

In [3]:
x = reader._get_service_reader('WFS', '1.1.0')
reader._parse_getcap_datasets(x)

[{'abstract': 'Estimates of the number and size of undiscovered mineral deposits of this type in Alaska',
  'bbox': {'dc:spatial': 'POLYGON ((-165 24,-165 73,-66 73,-66 24,-165 24))',
   'esip:eastBound': -66.0,
   'esip:northBound': 73.0,
   'esip:southBound': 24.0,
   'esip:westBound': -165.0},
  'metadata_urls': [{'format': None,
    'type': 'FGDC',
    'url': 'http://mrdata.usgs.gov/metadata/nmra.xml'}],
  'name': 'AK_Creede_epithermal_veins',
  'output_formats': ['text/xml; subtype=gml/3.1.1'],
  'spatial_refs': ['urn:ogc:def:crs:EPSG::4326',
   'urn:ogc:def:crs:EPSG::4269',
   'urn:ogc:def:crs:EPSG::4267',
   'urn:ogc:def:crs:EPSG::3857',
   'urn:ogc:def:crs:EPSG::900913',
   'urn:ogc:def:crs:EPSG::102113'],
  'temporal_extent': {},
  'title': 'Permissive tracts in Alaska for Creede epithermal veins'},
 {'abstract': 'Estimates of the number and size of undiscovered mineral deposits of this type in Alaska',
  'bbox': {'dc:spatial': 'POLYGON ((-165 24,-165 73,-66 73,-66 24,-165 24)

Once more for WCS, although it does generally have a more limited GetCapabilities.

In [88]:
with open('../inputs/wcs_v1_1_2.xml', 'r') as f:
    response = f.read()

rr = RawResponse(response.encode('string_escape'), 'text/xml')
response = rr.clean_raw_content()
    
url = 'http://ferret.pmel.noaa.gov/thredds/wms/las/woa05_monthly/data_ferret.pmel.noaa.gov_thredds_dodsC_data_PMEL_WOA05nc_monthly_o0112mn1.nc.jnl'
identity = {
    "service": {
        "name": "WCS",
        "request": "GetCapabilities",
        "version": [
            "1.1.2"
        ]
    },
    "protocol": "OGC"
}

reader = OgcReader(identity, response, url, {'harvest_date': '2015-09-15T12:45:00Z'})
reader.parse()
grapher = RdfGrapher(reader.description)
grapher.serialize()
print grapher.emit_format()

@prefix bcube: <http://purl.org/BCube/#> .
@prefix bibo: <http://purl.org/ontology/bibo/#> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix dcat: <http://www.w3.org/TR/vocab-dcat/#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix esip: <http://purl.org/esip/#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix vcard: <http://www.w3.org/TR/vcard-rdf/#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<urn:uuid:731be8cc-0f4d-484a-9da4-8fa49a0c3391> bcube:HTTPStatusCodeValue 200 ;
    bcube:HTTPStatusFamilyCode 200 ;
    bcube:HTTPStatusFamilyType "Success message" ;
    bcube:atTime "2015-09-15T12:45:00Z" ;
    bcube:hasConfidence "Good" ;
    bcube:hasUrlSource "Harvested" ;
    bcube:reasonPhrase "OK" ;
    bcube:validatedOn "2015-09-15T12:45: