In [2]:
import requests
from urllib.parse import quote # for double encoding IRIs for the API calls
import time
import json

## Functions

In [3]:
def retrieve_ontology_matches(term, numresults=50, ontology=None):
    """
    Retrieve ontology matches for a given term using OLS API

    params:
    - term (str) - a search term
    - ontologies (list) - a list of ontology names to search.
                            If not provided, all ontologies will be searched.   
    """
    base_url = 'https://www.ebi.ac.uk/ols4/api/search'
    # term = "alzheimer's"

    params = {
        'q': term,
        'rows': numresults,  
    }
    if ontology is not None:
        params['ontology'] = ontology

    try:
        # Make the API request
        response = requests.get(base_url, params=params)
        
        # Raise an exception for bad responses
        response.raise_for_status()
        
        # Parse the JSON results with full details
        results = response.json()

        jsonresults = json.dumps(results, indent=2)
        
        # Extract detailed concept information
        ontology_matches = []
        for term_info in results.get('response', {}).get('docs', []):
            # print(term_info)
            concept = {
                'label': term_info.get('label', 'No Label'),
                'iri': term_info.get('iri', 'No IRI'),
                'description': term_info.get('description', 'No Description'),
                'ontology_name': term_info.get('ontology_name', 'No Ontology Name'),
                'short_form': term_info.get('short_form', 'No Short Form')
            }
            ontology_matches.append(concept)

        return jsonresults, ontology_matches

    except requests.RequestException as e:
        print(f"Error connecting to OLS API: {e}")

def get_term_ancestors(ontology, iri):
    """
    Get ontology term properties using OLS API

    params:
    - ontology (str) - the ontology name
    - iri (str) - the IRI of the term

    returns:
    - ancestors (list) - list of ancestors, each with a dictionary of term properties

    Here is an example of a valid request:
    https://www.ebi.ac.uk/ols4/api/ontologies/duo/terms/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FDUO_0000017/ancestors?lang=en
    """
    
    
    # double quote here if we're passing it directly into the URL
    # single quote if we're passing as param
    iri_encoded = quote(quote(iri, safe=''))
    
    # print(f"Attempting to get ancestors for {iri_encoded}")

    url = f"https://www.ebi.ac.uk/ols4/api/ontologies/{ontology}/terms/{iri_encoded}/ancestors"
    params = {
        'lang': 'en'
    }

    try:
        # Make the API request
        response = requests.get(url, params=params)
        
        # Raise an exception for bad responses
        response.raise_for_status()
        
        # Parse the JSON results with full details
        result = response.json()

        # get the ancestors
        ancestors = [i for i in result.get('_embedded', {}).get('terms', [])]
        
        return ancestors

    except requests.RequestException as e:
        
        print(f"Error connecting to OLS API: {e}")

        return e

def rank_ontology_matches(ontology_matches):
    """
    Rank ontology matches

    Criteria for ranking:
    - relevance
    - abstraction (highest level that makes sense)
        - likely depends on get_ontology_term_properties to get the hierarchy

    params:
    - ontology_matches (list) - a list of ontology matches

    returns:
    - ranked_matches (list) - a list of ranked ontology matches
    """
    return ranked_matches

def get_ontology_term_properties(ontology, iri):
    """
    Get ontology term properties using OLS API

    params:
    - ontology (str) - the ontology name
    - iri (str) - the IRI of the term

    returns:
    - term_properties (dict) - a dictionary of term properties

    This is still broken. Fails with 500 Error Code with message "Expected at least 1 result for solr getFirst"
    Can confirm the IRI is properly encoded.
    """
    
    # double quote here if we're passing it directly into the URL
    # single quote if we're passing as param
    iri = quote(quote(iri, safe=''))

    url = f"https://www.ebi.ac.uk/ols4/api/ontologies/{ontology}/properties/{iri}"
    params = {
        # 'rows': 20,  # Increased rows
        # 'iri': iri,
        'lang': 'en'
    }

    try:
        # Make the API request
        response = requests.get(url, params=params)
        
        # Raise an exception for bad responses
        response.raise_for_status()
        
        # Parse the JSON results with full details
        term_properties = response.json()
        
        return term_properties

    except requests.RequestException as e:
        print(f"Error connecting to OLS API: {e}")



## Minimal working script

Start with a term and get back a list of matching ontology terms, sorted in ascending order of distance from root of ontology (to a rough approximation, most "general" first, though this is not stricly speaking comparable across ontologies)

In [5]:
# get initial set of ontology matches
term = "malaria"
jsonresults, matches = retrieve_ontology_matches(term, numresults=10)

# then get the distance from root for each one
# which is just the length of the ancestors list
matches_with_metadata = []
for idx, match in enumerate(matches):
    
    # get ancestors
    iri = match['iri']
    ontology = match['ontology_name']
    ancestors = get_term_ancestors(ontology, iri)
    print(f"Retrieved {len(ancestors)} ancestors for {iri}")

    # update match with ancestor info
    match['ancestors'] = ancestors
    match['distance_from_root'] = len(ancestors)

    matches_with_metadata.append(match)

    # pause for a bit
    # so we don't get rate limited
    time.sleep(1)

Retrieved 8 ancestors for http://purl.obolibrary.org/obo/NCIT_C34797
Retrieved 15 ancestors for http://www.ebi.ac.uk/efo/EFO_0001068
Retrieved 4 ancestors for http://purl.obolibrary.org/obo/DOID_12365
Retrieved 13 ancestors for http://purl.obolibrary.org/obo/MONDO_0005136
Retrieved 8 ancestors for http://purl.obolibrary.org/obo/OMIT_0009363
Retrieved 17 ancestors for http://purl.obolibrary.org/obo/GSSO_006352
Retrieved 3 ancestors for http://www.orpha.net/ORDO/Orphanet_673
Retrieved 7 ancestors for http://snomed.info/id/61462000
Retrieved 17 ancestors for http://www.ebi.ac.uk/efo/EFO_0001068
Retrieved 9 ancestors for http://purl.obolibrary.org/obo/DOID_12365


In [6]:
# sort by distance from root
matches_with_metadata.sort(key=lambda x: x['distance_from_root'])
# and print out
for idx, match in enumerate(matches_with_metadata, 1):
    print(f"\nResult {idx}:")
    print(f"Label: {match['label']}")
    print(f"IRI: {match['iri']}")
    print(f"Ontology Name: {match['ontology_name']}")
    print(f"Description: {match['description']}")
    print(f"Short Form: {match['short_form']}")
    print(f"Distance from Root: {match['distance_from_root']}")


Result 1:
Label: Malaria
IRI: http://www.orpha.net/ORDO/Orphanet_673
Ontology Name: ordo
Description: ['A life-threatening parasitic disease caused by <i>Plasmodium</i> (<i>P. </i>) parasites that are transmitted by <i>Anophles</i> mosquito bites to humans and is typically clinically characterized by attacks of fever, headache, chills and vomiting.']
Short Form: ORDO_673
Distance from Root: 3

Result 2:
Label: malaria
IRI: http://purl.obolibrary.org/obo/DOID_12365
Ontology Name: doid
Description: ['A parasitic protozoa infectious disease characterized as a vector-borne infectious disease caused by the presence of protozoan parasites of the genus Plasmodium in the red blood cells, transmitted from an infected to an uninfected individual by the bite of anopheline mosquitoes, and characterized by periodic attacks of chills and fever that coincide with mass destruction of blood cells and the release of toxic substances by the parasite at the end of each reproductive cycle.', 'Xref MGI.']
