quick checks of the new fgdc triples structure
and maybe the triples (yes the triples, whatevs)

In [18]:
%reload_ext autoreload
%autoreload 2

import os
import json
import glob
from semproc.parser import Parser
from semproc.preprocessors.metadata_preprocessors import FgdcItemReader

In [43]:
# load the proto-triples example
with open('../response_examples/fgdc_proto_example_1.xml', 'r') as f:
    response = f.read()

response = response.replace('\\\n', '').replace('\r\n', '').replace('\\r', '').replace('\\n', '').replace('\n', '')
response = response.decode('utf-8', errors='replace').encode('unicode_escape') 
    
url = 'https://bluehub.jrc.ec.europa.eu/erddap/metadata/fgdc/xml/noaa_pfeg_d543_8870_bc7f_fgdc.xml'
identities = [
    {"protocol": "FGDC", 
     "metadata": {
            "version": ["FGDC Content Standards for Digital Geospatial Metadata, FGDC-STD-001-1998"], 
            "name": "FGDC"}
    }
]

parser = Parser(response)

In [44]:
# execute the parse
reader = FgdcItemReader(parser.xml)
description = reader.parse_item()

# for the faking of things
description['catalog_record']['url'] = url
description['catalog_record']['harvestDate'] = '2015-06-20T20:22:00.643Z'

In [45]:
description

{'catalog_record': {'conformsTo': 'http://www.ngdc.noaa.gov/metadata/published/xsd/ngdcSchema/schema.xsd',
  'harvestDate': '2015-06-20T20:22:00.643Z',
  'url': 'https://bluehub.jrc.ec.europa.eu/erddap/metadata/fgdc/xml/noaa_pfeg_d543_8870_bc7f_fgdc.xml'},
 'dataset': {'abstract': 'Navy Global Environmental Model (NAVGEM) is a global numerical weather prediction computer model. It replaced NOGAPS as the prime model in the middle of February 2013 at the Navy Fleet Numerical Meteorology and Oceanography Center (FNMOC) Weather model synoptic site. [Wikipedia]',
  'identifier': '',
  'spatial_extent': {'east': '180.0',
   'north': '90.0',
   'south': '-90.0',
   'west': '-180.0',
   'wkt': 'POLYGON ((-180 -90,-180 90,180 90,180 -90,-180 -90))'},
  'title': 'Navy Global Environmental Model (NAVGEM), 0.5 degree, Pressure MSL'},
 'keywords': [{'terms': ['conversion',
    'data',
    'fnmoc',
    'level',
    'mean',
    'navg',
    'observed',
    'pressure',
    'sea',
    'theortically',
  

In [48]:
# let's play with triples

import rdflib
import hashlib
from uuid import uuid4
from rdflib import Graph, Literal, RDF, RDFS, Namespace, URIRef
from rdflib.namespace import DC, DCTERMS, FOAF, XSD, OWL


class Grapher():
    def __init__(self):
        self.graph = Graph()
        self._bind_namespaces()
    
    # some faked namespaces
    _ontology_uris = {
        'bcube': 'http://purl.org/nsidc/bcube/bcube#',
        'vcard': 'http://purl.org/nsidc/bcube/vcard#',
        'esip': 'http://purl.org/nsidc/bcube/esip#',
        'vivo': 'http://purl.org/nsidc/bcube/vivo#',
        'GeoSPARQL': 'http://purl.org/nsidc/bcube/geosparql#',
        'dcat': 'http://purl.org/nsidc/bcube/dcat#',
        'dc': str(DC),
        'dct': str(DCTERMS),
        'foaf': str(FOAF),
        'xsd': str(XSD),
        'owl': str(OWL)
    }
    
    def _bind_namespaces(self):
        # bind our lovely fake namespaces
        for prefix, uri in self._ontology_uris.iteritems():
            self.graph.bind(prefix, uri)

    def generate_predicate(self, prefix, name):
        return Namespace(self._ontology_uris[prefix])[name]
            
    def create_resource(self, resource_prefix, resource_type, identifier=''):
        # make a thing with a uuid as a urn
        # and just assign it to type if it's not overridden
        identifier = identifier if identifier else uuid4().urn
        resource = self.graph.resource(identifier)
        ref = Namespace(self._ontology_uris[resource_prefix])[resource_type]
        resource.add(OWL.a, URIRef(ref))
        return resource

    def serialize(self):
        return self.graph.serialize(format='nt')

grapher = Grapher()    

# so from our json.
for root_entity_type, root_entity in description.iteritems():
    #print root_entity
    
    if root_entity_type == 'catalog_record':
        catalog_record = grapher.create_resource('dcat', 'CatalogRecord', 'urn:sha:21a2c32fd74798563eed88a3fcd1acf38092f47e')
        catalog_record.add(grapher.generate_predicate('vcard', 'hasURL'), Literal(root_entity['url']))
        catalog_record.add(grapher.generate_predicate('vivo', 'harvestDate'), Literal(root_entity['harvestDate']))
        catalog_record.add(DC.conformsTo, Literal(root_entity['conformsTo']))
    elif root_entity_type == 'dataset':
        dataset = grapher.create_resource('dcat', 'Dataset')
    else:
        continue
    
    
nt = grapher.serialize()    
print nt


<urn:sha:21a2c32fd74798563eed88a3fcd1acf38092f47e> <http://purl.org/nsidc/bcube/vcard#hasURL> "https://bluehub.jrc.ec.europa.eu/erddap/metadata/fgdc/xml/noaa_pfeg_d543_8870_bc7f_fgdc.xml" .
<urn:sha:21a2c32fd74798563eed88a3fcd1acf38092f47e> <http://www.w3.org/2002/07/owl#a> <http://purl.org/nsidc/bcube/dcat#CatalogRecord> .
<urn:sha:21a2c32fd74798563eed88a3fcd1acf38092f47e> <http://purl.org/dc/elements/1.1/conformsTo> "http://www.ngdc.noaa.gov/metadata/published/xsd/ngdcSchema/schema.xsd" .
<urn:uuid:346e2158-38dd-4e75-beab-7ef0ca004a9a> <http://www.w3.org/2002/07/owl#a> <http://purl.org/nsidc/bcube/dcat#Dataset> .
<urn:sha:21a2c32fd74798563eed88a3fcd1acf38092f47e> <http://purl.org/nsidc/bcube/vivo#harvestDate> "2015-06-20T20:22:00.643Z" .


