# Uberon, CL, and PCL growth over time

In [1]:
%pip install requests pyoxigraph

Note: you may need to restart the kernel to use updated packages.


In [2]:
ONTOLOGIES = [
    {"curie": "UBERON", "owner": "obophenotype", "repo": "uberon", "owl-file": "uberon-base.owl" },
    {"curie": "CL", "owner": "obophenotype", "repo": "cell-ontology", "owl-file": "cl-base.owl" },
    {"curie": "PCL", "owner": "obophenotype", "repo": "provisional_cell_ontology", "owl-file": "pcl-base.owl" },
]

## Download all versions of select ontologies

In [3]:
import requests
import os.path

for onto in ONTOLOGIES:
    owner,repo,owl_file = onto["owner"],onto["repo"],onto["owl-file"]
    releases_url = f"https://api.github.com/repos/{owner}/{repo}/releases"
    releases = requests.get(releases_url).json()
    onto["releases"] = releases
    for release in releases:
        tag = release["tag_name"]
        download_url = f"https://github.com/{owner}/{repo}/releases/download/{tag}/{owl_file}"
        if len([ r for r in release["assets"] if r["name"] == owl_file ]) == 0:
            download_url = f"https://raw.githubusercontent.com/{owner}/{repo}/refs/tags/{tag}/{owl_file}"

        output_file = f"output/onto/{repo}/{tag}/{owl_file}"
        release["output_file"] = output_file
        if not os.path.exists(output_file):
            print(download_url)
            !mkdir -p output/onto/{repo}/{tag}
            !curl -H "Accept: application/octet-stream" -f -L -o {output_file} {download_url}

## Query all versions of select ontologies

In [4]:
from pyoxigraph import Store, RdfFormat

# Read in SPARQL query to compute statistics for each version of each ontology
query = open("ontology-stats.rq", "rt").read()

def doQuery(onto, release, owl_file):
    store = Store()
    store.bulk_load(path=owl_file, format=RdfFormat.RDF_XML)

    results = store.query(query.replace("UBERON", onto["curie"]))
    for row in results:
         date = release["tag_name"].replace("v", "")
         result = dict((v.value, int(row[v].value)) for v in results.variables)
         return { "ontology": onto["repo"].replace("_", "-"), "date": date } | result

results = []
for onto in ONTOLOGIES:
    for release in onto["releases"]:
        output_file = release["output_file"]
        if os.path.exists(output_file):
            results.append(doQuery(onto, release, output_file))

In [5]:
# Write ontology growth statistics to csv

from csv import DictWriter

with open("output/other-ontologies-growth.csv", "w", newline="") as f:
    writer = DictWriter(f, fieldnames=results[0].keys())
    writer.writeheader()
    writer.writerows(results)