# TOGAF 9.2 Content Metamodel Ontology

## Transform CSV into RDF

In [None]:
import re
import pandas as pd
from rdflib import Graph, Namespace, URIRef, Literal, BNode
from rdflib.namespace import RDF, OWL, RDFS, SKOS, DCTERMS, NamespaceManager

In [None]:
# Currently only contains properties to simplify construction with Protege
df = pd.read_csv('/notebook/ontology_togaf_content_metamodel.csv', index_col = '@id', keep_default_na = False)
df

In [None]:
# Graph to store the set of schemas
graph = Graph()

# Declare all namespaces
TOGAF = Namespace('http://www.semanticweb.org/ontologies/2020/4/OntologyTOGAFContentMetamodel.owl#')

# Bind namespaces for clean serialization
namespace_manager = NamespaceManager(graph)
namespace_manager.bind('rdfs', RDFS, override = False)
namespace_manager.bind('skos', SKOS, override = False)
namespace_manager.bind('owl', OWL, override = False)
namespace_manager.bind('dcterms', DCTERMS, override = False)
namespace_manager.bind('togaf', TOGAF, override = False)

In [None]:
# RdfLib gives back prefixes as a generator, which is inconvenient to work with when parsing the 
prefixes = {}
for prefix, uri in namespace_manager.namespaces():
    prefixes[prefix] = uri
prefixes

In [None]:
# Simple definitions of the curie and uri since we know what is in the dataset
curie = re.compile('^\w*:\w*$')
uri = re.compile('^http[s]?://.*$')

In [None]:
# Define simple function to transform Pandas value into RdfLib Node (eitehr Literal or URIRef)
# taking declared prefixes into account
# We assume that all literals are strings for now, in default language
def getNode(value):
    if (curie.match(value)):
        prefix, reference = value.split(':')
        uriref = ''.join((prefixes[prefix], reference))
        return URIRef(uriref)
    elif uri.match(value):
        return URIRef(value)
    else:
        return Literal(value)

In [None]:
for (index, series) in df.iterrows():
    for (column, value) in series.iteritems():
        if value:
            graph.add((getNode(index),
                   getNode(column),
                   getNode(value)))

In [None]:
# Serialize into ttl
ttl = graph.serialize(format = 'turtle', indent = 2)
ttl

In [None]:
# Write out ttl into a file
ttl_file_name = '/metamodel/OntologyTOGAFContentMetamodelV2.ttl'
with open(ttl_file_name,'wb') as ttl_file:
    ttl_file.write(ttl)