# Analyse SRAO
Get the number of concepts and the depth of the ontology

It is important to have rdflib, SPARQLWrapper, pandas, and numpy
* pip install rdflib
* pip install SPARQLWrapper
* pip install pandas
* pip install numpy

Download the lastest version of SRAO from https://github.com/FAIRsharing/subject-ontology

In [1]:
from rdflib import Graph
from rdflib.namespace import RDFS
from rdflib import URIRef
import rdflib
import json
from collections import deque
import numpy as np
import pandas as pd

In [2]:
input_file = "SRAO.owl"
g = Graph()
g.parse(input_file)

<Graph identifier=N1db71c75890f4cdc959dd6b25d8c7062 (<class 'rdflib.graph.Graph'>)>

In [3]:
qres = g.query(
    """SELECT DISTINCT ?a
       WHERE {
          ?a a owl:Class .
       }""")


topics = dict()
for row in qres:
    topics[row[0]] = True
    
print("Number of concepts: {}".format(len(topics)))

Number of concepts: 425


In [4]:
qres = g.query(
    """SELECT DISTINCT ?a ?b
       WHERE {
          ?a rdfs:subClassOf ?b .
       }""")

broaders = dict()
narrowers = dict()
for row in qres:
    if row[0] not in broaders:
        broaders[row[0]] = list()
    broaders[row[0]].append(row[1])
    if row[1] not in narrowers:
        narrowers[row[1]] = list()
    narrowers[row[1]].append(row[0])

In [5]:
unhier = broaders
concepts = topics
for concept, value in concepts.items():
    queue = deque() 
    max_depth = value
    queue.append({"t":concept,"d":value})
    while len(queue) > 0:
        dequeued = queue.popleft()
        if dequeued["t"] in unhier:
            broads = unhier[dequeued["t"]]
            new_depth = dequeued["d"]+1
            if new_depth > max_depth:
                max_depth = new_depth
            for broader in broads:
                queue.append({"t":broader,"d":dequeued["d"]+1})
    
    concepts[concept] = max_depth

In [6]:
list_of_depths = pd.DataFrame.from_dict(concepts, orient='index', columns=['depth'])
list_of_depths.sort_values('depth', inplace=True, ascending=False)

In [7]:
print("Concepts are ranked by maximum depth")
list_of_depths.head(20)

Concepts are ranked by maximum depth


Unnamed: 0,depth
http://purl.obolibrary.org/obo/OMIT_0001094,8
http://www.fairsharing.org/ontology/subject/SRAO_0000315,8
http://purl.obolibrary.org/obo/NCIT_C20179,8
http://www.fairsharing.org/ontology/subject/SRAO_0000032,8
http://edamontology.org/topic_3415,8
http://edamontology.org/topic_0209,8
http://www.fairsharing.org/ontology/subject/SRAO_0000176,8
http://www.fairsharing.org/ontology/subject/SRAO_0000303,8
http://www.fairsharing.org/ontology/subject/SRAO_0000140,8
http://edamontology.org/topic_3293,8
