##FGDC Triples

In [1]:
%reload_ext autoreload
%autoreload 2

import os
import json
import glob
from semproc.rawresponse import RawResponse
from semproc.parser import Parser
from semproc.preprocessors.metadata_preprocessors import FgdcItemReader
from semproc.serializers.rdfgraphs import RdfGrapher

In [2]:
# load the proto-triples example
with open('../inputs/fgdc_proto_example_1.xml', 'r') as f:
    response = f.read()
    
rr = RawResponse(response, 'text/xml')
response = rr.clean_raw_content()
  
url = 'https://bluehub.jrc.ec.europa.eu/erddap/metadata/fgdc/xml/noaa_pfeg_d543_8870_bc7f_fgdc.xml'
identities = [
    {"protocol": "FGDC", 
     "metadata": {
            "version": ["FGDC Content Standards for Digital Geospatial Metadata, FGDC-STD-001-1998"], 
            "name": "FGDC"}
    }
]

parser = Parser(response)

In [3]:
# execute the parse (this one takes xml)
reader = FgdcItemReader(parser.xml, url, {'harvest_date': '2015-06-20T20:22:00.643Z'})
reader.parse_item()

In [4]:
reader.description

{'catalog_records': [{'bcube:dateCreated': '2015-06-20T20:22:00.643Z',
   'bcube:lastUpdated': '2015-06-20T20:22:00.643Z',
   'dc:conformsTo': ['http://www.ngdc.noaa.gov/metadata/published/xsd/ngdcSchema/schema.xsd'],
   'object_id': 'urn:sha:f65fb3d1efeee860adbbb53d3a20e80e1f50fe625e4887155f196a92',
   'rdf:type': 'FGDC:CSDGM',
   'relationships': [{'object_id': 'urn:uuid:144ef290-d838-49a1-83e5-385ff9748390',
     'relate': 'bcube:originatedFrom'},
    {'object_id': 'urn:sha:d5a3a66150264cfeca19322b996a1cbf202b15966e0f1e41b99ae991',
     'relate': 'foaf:primaryTopic'}],
   'urls': [{'bcube:HTTPStatusCodeValue': 200,
     'bcube:HTTPStatusFamilyCode': 200,
     'bcube:HTTPStatusFamilyType': 'Success message',
     'bcube:atTime': '2015-06-20T20:22:00.643Z',
     'bcube:hasConfidence': 'Good',
     'bcube:hasUrlSource': 'Harvested',
     'bcube:reasonPhrase': 'OK',
     'bcube:validatedOn': '2015-06-20T20:22:00.643Z',
     'object_id': 'urn:uuid:144ef290-d838-49a1-83e5-385ff9748390',
 

In [5]:
g = RdfGrapher(reader.description)
g.serialize()
ttl = g.emit_format()
print ttl

@prefix bcube: <http://purl.org/BCube/#> .
@prefix bibo: <http://purl.org/ontology/bibo/#> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix dcat: <http://www.w3.org/TR/vocab-dcat/#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix esip: <http://purl.org/esip/#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix vcard: <http://www.w3.org/TR/vcard-rdf/#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<urn:sha:8df1983f1f9573be5059558fd20d718d9550720c873eb8930cdc6735> bcube:HTTPStatusCodeValue 200 ;
    bcube:HTTPStatusFamilyCode 200 ;
    bcube:HTTPStatusFamilyType "Success message" ;
    bcube:atTime "2015-06-20T20:22:00.643Z" ;
    bcube:hasConfidence "Good" ;
    bcube:hasUrlSource "Harvested" ;
    bcube:reasonPhrase "OK" ;
    bcube:validat