# TOGAF 9.2 Content Metamodel Taxonomy

## Transform CSV into RDF

In [None]:
import re
import pandas as pd
from rdflib import Graph, Namespace, URIRef, Literal, BNode
from rdflib.namespace import RDF, OWL, RDFS, SKOS, DCTERMS, NamespaceManager

In [None]:
df = pd.read_csv('/notebook/taxonomy_togaf_content_metamodel.csv', index_col = '@id', keep_default_na = False)
df

In [None]:
# Graph to store the set of schemas
graph = Graph()

# Declare all namespaces
TOGAF = Namespace('http://www.semanticweb.org/ontologies/2020/4/OntologyTOGAFContentMetamodel.owl#')
TOGAFVOC = Namespace('http://www.semanticweb.org/ontologies/2020/4/VocabularyTOGAFContentMetamodel.skos#')

# Bind namespaces for clean serialization
namespace_manager = NamespaceManager(graph)
namespace_manager.bind('rdfs', RDFS, override = False)
namespace_manager.bind('skos', SKOS, override = False)
namespace_manager.bind('owl', OWL, override = False)
namespace_manager.bind('dcterms', DCTERMS, override = False)
namespace_manager.bind('togaf', TOGAF, override = False)
namespace_manager.bind('togafvoc', TOGAFVOC, override = False)

In [None]:
# RdfLib gives back prefixes as a generator, which is inconvenient to work with when parsing the 
prefixes = {}
for prefix, uri in namespace_manager.namespaces():
    prefixes[prefix] = uri
prefixes

In [None]:
# Simple definitions of the curie and uri since we know what is in the dataset
curie = re.compile('^\w*:\w*$')
uri = re.compile('^http[s]?://.*$')

In [None]:
# Define simple function to transform Pandas value into RdfLib Node (eitehr Literal or URIRef)
# taking declared prefixes into account
# We assume that all literals are strings for now, in default language
def getNode(value):
    if (curie.match(value)):
        prefix, reference = value.split(':')
        uriref = ''.join((prefixes[prefix], reference))
        return URIRef(uriref)
    elif uri.match(value):
        return URIRef(value)
    else:
        return Literal(value)

In [None]:
for (index, series) in df.iterrows():
    for (column, value) in series.iteritems():
        re_match = re.search('([\w:]*)(\[(\d*)\])?', column)
        if value:
            graph.add((getNode(index),
                   getNode(re_match.group(1)),
                   getNode(value)))

In [None]:
# Serialize into ttl
ttl = graph.serialize(format = 'turtle', indent = 2)
ttl

In [None]:
# Write out ttl into a file
ttl_file_name = '/metamodel/VocabularyTOGAFContentMetamodelV2.ttl'
with open(ttl_file_name,'wb') as ttl_file:
    ttl_file.write(ttl)

In [None]:
# Serialize into JSON-LD with SKOS context to simplify development work
context = {'source': {'@id': 'http://purl.org/dc/terms/source', '@type': '@id'},
           'broader': {'@id': 'http://www.w3.org/2004/02/skos/core#broader', '@type': '@id'},
           'inScheme': {'@id': 'http://www.w3.org/2004/02/skos/core#inScheme', '@type': '@id'},
           'topConceptOf': {'@id': 'http://www.w3.org/2004/02/skos/core#topConceptOf', '@type': '@id'},
           'hasTopConcept': {'@id': 'http://www.w3.org/2004/02/skos/core#hasTopConcept', '@type': '@id'},
           'label': {'@id': 'http://www.w3.org/2000/01/rdf-schema#label'},
           'comment': {'@id': 'http://www.w3.org/2000/01/rdf-schema#comment'},
           'definedBy': {'@id': 'http://www.w3.org/2000/01/rdf-schema#definedBy', '@type': '@id'},
           '@vocab': 'http://www.w3.org/2004/02/skos/core#'}

json = graph.serialize(format = 'json-ld', context = context, indent = 2)
json

In [None]:
# Write out json into a file
json_file_name = '/metamodel/VocabularyTOGAFContentMetamodelV2.json'
with open(json_file_name,'wb') as json_file:
    json_file.write(json)