# Uberon, CL, and PCL growth over time

In [1]:
%pip install requests


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
ONTOLOGIES = [
    {"curie": "UBERON", "owner": "obophenotype", "repo": "uberon", "owl-file": "uberon-base.owl" },
    {"curie": "CL", "owner": "obophenotype", "repo": "cell-ontology", "owl-file": "cl-base.owl" },
    {"curie": "PCL", "owner": "obophenotype", "repo": "provisional_cell_ontology", "owl-file": "pcl-base.owl" },
]

## Download all versions of select ontologies

In [3]:
import requests
import os.path
import os

def download_file(download_url, output_file):
    headers = {"Accept": "application/octet-stream"}
    response = requests.get(download_url, headers=headers, stream=True, allow_redirects=True)
    if response.status_code == 200:
        with open(output_file, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
    else:
        response.raise_for_status()

for onto in ONTOLOGIES:
    owner,repo,owl_file = onto["owner"],onto["repo"],onto["owl-file"]
    releases_url = f"https://api.github.com/repos/{owner}/{repo}/releases"
    releases = requests.get(releases_url).json()
    onto["releases"] = releases
    for release in releases:
        tag = release["tag_name"]
        download_url = f"https://github.com/{owner}/{repo}/releases/download/{tag}/{owl_file}"
        if len([ r for r in release["assets"] if r["name"] == owl_file ]) == 0:
            download_url = f"https://raw.githubusercontent.com/{owner}/{repo}/refs/tags/{tag}/{owl_file}"

        output_dir = f"output/onto/{repo}/{tag}"
        output_file = f"{output_dir}/{owl_file}"
        release["output_file"] = output_file   
        if not os.path.exists(output_file):
            print(download_url)
            os.makedirs(output_dir)
            download_file(download_url, output_file)

https://github.com/obophenotype/cell-ontology/releases/download/v2025-02-13/cl-base.owl


## Query all versions of select ontologies

In [4]:
import subprocess
import json

# Run a shell command and parse the output as JSON
def run_and_parse_json(command):
    try:
        # Execute the command and capture stdout
        result = subprocess.run(command, shell=True, check=True, text=True, capture_output=True)
        
        # Parse the JSON output
        parsed_data = json.loads(result.stdout)
        
        return parsed_data
    except subprocess.CalledProcessError as e:
        print(f"Command failed with error: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON: {e}")
        return None


In [5]:
def rdf_stats(onto, release, owl_file):
    date = release["tag_name"].replace("v", "")
    prefix = f"http://purl.obolibrary.org/obo/{onto['curie']}_"
    result = run_and_parse_json(f'cat "{owl_file}" | node ../src/rdf-stats.js application/rdf+xml "{prefix}"')
    return { "ontology": onto["repo"].replace("_", "-"), "date": date } | result

results = []
for onto in ONTOLOGIES:
    for release in onto["releases"]:
        output_file = release["output_file"]
        if os.path.exists(output_file):
            results.append(rdf_stats(onto, release, output_file))

In [6]:
# Write ontology growth statistics to csv

from csv import DictWriter

with open("output/other-ontologies-growth.csv", "w", newline="") as f:
    writer = DictWriter(f, fieldnames=results[0].keys())
    writer.writeheader()
    writer.writerows(results)