In [1]:
# Import and mess with vocabulary
import os
import rdflib
import rdflib.plugins.sparql

PFX = """
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
"""
proj_dir = os.path.abspath("..")

vsrc = os.path.join(proj_dir, "src/vocabularies/material-type/materialType.ttl")

V = rdflib.ConjunctiveGraph()
V.parse(vsrc)

<Graph identifier=file:///Users/vieglais/Documents/Projects/isamples/source/metadata/src/vocabularies/material-type/materialType.ttl (<class 'rdflib.graph.Graph'>)>

In [2]:
q = PFX + """SELECT ?parent ?child
WHERE {
    ?child rdf:type skos:Concept .
    ?child skos:broader ?parent .
}"""

qres = V.query(q)
for t in qres:
    print(f"{t[0]} <- {t[1]}")

https://w3id.org/isample/vocabulary/material/0.9/anyanthropogenicmaterial <- https://w3id.org/isample/vocabulary/material/0.9/anthropogenicmetal
https://w3id.org/isample/vocabulary/material/0.9/material <- https://w3id.org/isample/vocabulary/material/0.9/anyanthropogenicmaterial
https://w3id.org/isample/vocabulary/material/0.9/material <- https://w3id.org/isample/vocabulary/material/0.9/anyice
https://w3id.org/isample/vocabulary/material/0.9/material <- https://w3id.org/isample/vocabulary/material/0.9/biogenicnonorganicmaterial
https://w3id.org/isample/vocabulary/material/0.9/material <- https://w3id.org/isample/vocabulary/material/0.9/dispersedmedia
https://w3id.org/isample/vocabulary/material/0.9/material <- https://w3id.org/isample/vocabulary/material/0.9/earthmaterial
https://w3id.org/isample/vocabulary/material/0.9/material <- https://w3id.org/isample/vocabulary/material/0.9/fluid
https://w3id.org/isample/vocabulary/material/0.9/fluid <- https://w3id.org/isample/vocabulary/materia

In [12]:
def listVocabularies(g):
    '''List the vocabularies in the provided graph
    '''
    q = PFX + """SELECT ?s
    WHERE {
        ?s rdf:type skos:ConceptScheme.
    }"""
    qres = g.query(q)
    res = []
    for r in qres:
        res.append(r[0])
    return res


def getVocabRoot_no_root_listed(v):
    """
    Finds roots in the vocab as terms that do not have a broader concept
    """
    q = PFX + """SELECT ?s
    WHERE {
        ?s rdf:type skos:Concept .
        OPTIONAL { ?s skos:broader ?c } .
        FILTER ( !bound(?c) )
    }
    """
    qres = v.query(q)
    for t in qres:
        print(t)
        
        
def getVocabRoot(g, v):
    """Get top concept of the specific vocabulary
    """    
    q = rdflib.plugins.sparql.prepareQuery(PFX + """SELECT ?s
    WHERE {
        ?s skos:topConceptOf ?vocabulary .
    }""")
    qres = g.query(q, initBindings={'vocabulary': v})
    res = []
    for row in qres:
        res.append(row[0])
    return res

def getNarrower(g, v, r):
    q = rdflib.plugins.sparql.prepareQuery(PFX + """SELECT ?s
    WHERE {
        ?s skos:inScheme ?vocabulary .
        ?s skos:broader ?parent .
    }""")
    qres = g.query(q, initBindings={'vocabulary': v, 'parent':r})
    res = []
    for row in qres:
        res.append(row[0])
    return res
    
def getTerm(g, t):
    '''return a node with immediate predicates and objects
    '''
    q = rdflib.plugins.sparql.prepareQuery(PFX + """SELECT ?p ?o
    WHERE {
        ?subject ?p ?o .
    }""")
    qres = g.query(q, initBindings={'subject': t})
    return qres


def printTerm(t, indent=""):
    for row in t:
        print(f"{indent}{row[0]} : {row[1]}")

def getObjects(g, s, p):
    q = rdflib.plugins.sparql.prepareQuery(PFX + """SELECT ?o
    WHERE {
        ?subject ?predicate ?o .
    }""")
    qres = g.query(q, initBindings={'subject':s, 'predicate':p})
    res = []
    for row in qres:
        print(row)
        res.append(row[0])
    return res        

def skosT(term):
    return rdflib.URIRef(f"http://www.w3.org/2004/02/skos/core#{term}")        
        
def describeTerm(g, t, depth=0):
    INDENT = "  "
    res = []
    labels = getObjects(g, t, skosT('prefLabel'))
    print(labels)
    if len(labels) < 1:
        res.append(f"`{depth*INDENT}{t}`")
    else:
        res.append(f"{depth*INDENT}{labels[0]}")
        for label in labels[1:]:
            res.append(f"{depth*INDENT}* `{label}`")
    return res        
        
vocabs = listVocabularies(V)
print("Vocabularies:")
for v in vocabs:
    print(v) 
    print("  Root terms:")
    root = getVocabRoot(V, v)
    res = describeTerm(V, root[0])
    print("\n".join(res))
    '''
    for r in root:
        print(f"    {r}")
        print("      Children:")
        cs = getNarrower(V, v, r)
        for c in cs:
            print(f"      {c}")
            t = getTerm(V, c)
            printTerm(t, indent=" "*8)
    '''

Vocabularies:
https://w3id.org/isample/vocabulary/material/0.9/materialsvocabulary
  Root terms:
(rdflib.term.Literal('Material ', lang='en'),)
[rdflib.term.Literal('Material ', lang='en')]
Material 
