In [1]:
# Imports
from rdflib import Graph
from rdflib.namespace import RDFS
import time

In [2]:
# Takes a while to run
print('parsing')

start = time.time()

g = Graph()
g.parse("mondo.owl")

print('parsed in ' + str(int(time.time() - start)) + ' seconds')

parsing
parsed in 138 seconds


In [3]:
# Some exploration of our graph
predicates = set()

for _, predicate, _ in g:
    predicates.add(predicate)

# Print all unique predicates
for predicate in predicates:
    print(predicate)

http://www.w3.org/2000/01/rdf-schema#subClassOf
http://www.geneontology.org/formats/oboInOwl#hasSynonymType
http://www.geneontology.org/formats/oboInOwl#is_metadata_tag
http://purl.org/dc/elements/1.1/description
http://purl.obolibrary.org/obo/IAO_0000233
http://www.w3.org/2002/07/owl#annotatedProperty
http://www.w3.org/2003/11/swrl#classPredicate
http://www.w3.org/2002/07/owl#unionOf
http://www.w3.org/1999/02/22-rdf-syntax-ns#rest
http://purl.org/dc/terms/creator
http://www.w3.org/2004/02/skos/core#broadMatch
http://www.w3.org/2002/07/owl#members
http://purl.obolibrary.org/obo/RO_0002161
http://www.geneontology.org/formats/oboInOwl#creation_date
http://purl.obolibrary.org/obo/RO_0002175
http://www.w3.org/2002/07/owl#hasSelf
http://www.w3.org/2002/07/owl#annotatedTarget
http://www.w3.org/2003/11/swrl#argument2
http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym
http://www.w3.org/2002/07/owl#complementOf
http://purl.obolibrary.org/obo/mondo#should_conform_to
http://www.geneon

In [4]:
# Looking at rdf specific predicates
for predicate in predicates:
    if 'rdf-schema' in predicate:
        print(predicate)

http://www.w3.org/2000/01/rdf-schema#subClassOf
http://www.w3.org/2000/01/rdf-schema#comment
http://www.w3.org/2000/01/rdf-schema#subPropertyOf
http://www.w3.org/2000/01/rdf-schema#label
http://www.w3.org/2000/01/rdf-schema#seeAlso
http://www.w3.org/2000/01/rdf-schema#range
http://www.w3.org/2000/01/rdf-schema#domain


In [5]:
# Grabbing code for Parkinson disease

name = 'Parkinson disease'

def code_from_name(name):
    for subj, obj in g.subject_objects(predicate=RDFS.label):
        if obj in name and name in obj:
            return subj

code = code_from_name(name)
print(code)

http://purl.obolibrary.org/obo/MONDO_0005180


In [7]:
# Storing full codes of the direct children of this node

child_codes = []

for subj, obj in g.subject_objects(predicate=RDFS.subClassOf):
    if code in obj:
        child_codes.append(subj)

In [11]:
# Grabbing the names (labels) of these children

child_names_codes = []
for subj, obj in g.subject_objects(predicate=RDFS.label):
    if subj in child_codes:
        child_names_codes.append((subj, obj))

In [12]:
# Cleaning the strings for printing
child_pretty = [(code[:], name.split('/')[-1]) for name, code in child_names_codes]

In [14]:
child_pretty

[('late-onset Parkinson disease', 'MONDO_0008199'),
 ('parkinsonian-pyramidal syndrome', 'MONDO_0009830'),
 ('Parkinson disease, mitochondrial', 'MONDO_0010796'),
 ('parkinson disease 16', 'MONDO_0013167'),
 ('young-onset Parkinson disease', 'MONDO_0017279'),
 ('parkinson disease 25, autosomal recessive early-onset, with impaired intellectual development',
  'MONDO_0957576')]