In [1]:
import datetime
import uuid
from bs4 import BeautifulSoup
from string import Template

In [2]:
mappingFile = "./Mapping.x3ml"
outputDir = "./fields/"

In [3]:
containerPrefix = "http://www.metaphacts.com/instances/fields/"

Read mapping XML file from 3M Mapping tool

In [4]:
with open(mappingFile, "r") as f:
    xml = BeautifulSoup(f.read(), "lxml-xml")

Parse mappings

In [5]:
xmlMappings = xml.find_all('mapping')

In [6]:
mappings = []
for mapping in xmlMappings:
    m = {
        "domain": mapping.domain.type.text,
        "links": [{ 
            "from": l.path.source_relation.relation.text,
            "to": [r.text for r in l.path.target_relation.find_all("relationship")]
        } for l in mapping.find_all("link")]
    }
    mappings.append(m)

Mappings start off from different domains. If we want to start the fields all from the same domain, we can specify here how to get from one domain to the next

In [7]:
# Path elements to add depending on Domain
normaliseDomains = {
    "crm:E38_Image" : {
        "crm:E38_Image": [],
        "crm:E24_Physical_Man-Made_Thing": ["crm:P138_represents"]
    },
}

Field template for a field of datatype xsd:anyURI

In [8]:
fieldDefinitionAnyURI = Template('''
@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix sp: <http://spinrdf.org/sp#> .
@prefix rs: <http://www.researchspace.org/ontology/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix User: <http://www.metaphacts.com/resource/user/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix : <http://www.metaphacts.com/resource/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

<$containerPrefix$fieldName/context> {
	<$containerPrefix$fieldName> a <http://www.w3.org/ns/ldp#Resource> , <http://www.w3.org/ns/prov#Entity> , <http://www.metaphacts.com/ontology/fields#Field> ;
		rdfs:label "$fieldName" ;
		<http://www.w3.org/ns/prov#generatedAtTime> "$dateGenerated"^^xsd:dateTime ;
		<http://www.w3.org/ns/prov#wasAttributedTo> User:admin ;
		<http://www.metaphacts.com/ontology/fields#domain> $domain ;
		<http://www.metaphacts.com/ontology/fields#insertPattern> _:$uuidInsertPattern ;
		<http://www.metaphacts.com/ontology/fields#selectPattern> _:$uuidSelectPattern ;
		<http://www.metaphacts.com/ontology/fields#xsdDatatype> xsd:anyURI .
	
	_:$uuidInsertPattern a sp:Query ;
		sp:text "INSERT { ?subject ?predicate ?value} WHERE {}" .
	
	_:$uuidSelectPattern a sp:Query ;
		sp:text """SELECT ?value ?label WHERE {
  ?subject $selectPath ?value .
  ?value rdfs:label ?label
}""" .
	
	_:$uuidContainer <http://www.w3.org/ns/ldp#contains> <$containerPrefix$fieldName> .
}

{
	_:$uuidContainer a <http://www.w3.org/ns/ldp#Container> , <http://www.w3.org/ns/ldp#Resource> , <http://www.w3.org/ns/prov#Entity> .
}
''')

Normalise to E38 Image

In [9]:
normaliseTo = "crm:E38_Image"

Generate field definitions from template and write to files

In [10]:
for mapping in mappings:
    for link in mapping['links']:
        label = link['from'].split("/")[0]
        d = {
            'containerPrefix': containerPrefix,
            'fieldName': label,
            'dateGenerated': datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"),
            'domain': normaliseTo,
            'uuidInsertPattern': uuid.uuid4(),
            'uuidSelectPattern': uuid.uuid4(),
            'uuidContainer': uuid.uuid4(),
            'selectPath':  "/".join(normaliseDomains[normaliseTo][mapping['domain']] + link['to'])
        }
        output = fieldDefinitionAnyURI.substitute(d)
        f = open(outputDir + label + ".trig", "w")
        f.write(output)
        f.close()