In [9]:
import requests
import requests_cache
from time import sleep
import pandas as pd

import pprint

In [2]:
# Replace with your BioPortal API Key
API_KEY = "e4043787-8e9c-4fc8-a0a0-54fd01b6be9d"
ONTOLOGY = "METPO"
BASE_URL = "https://data.bioontology.org/ontologies"
HEADERS = {"Authorization": f"apikey {API_KEY}"}

In [3]:
# Setup caching (cache expires in 1 day)
requests_cache.install_cache("bioportal_cache", expire_after=86400)

In [4]:
def fetch_all_mappings(ontology):
    """
    Fetches all mappings for a given ontology, handling pagination, caching, and API authentication.

    :param ontology: The ontology acronym (e.g., "METPO").
    :return: List of all mappings.
    """
    url = f"{BASE_URL}/{ontology}/mappings?apikey={API_KEY}"  # Append API key as a query parameter
    mappings_pages = {}

    while url:
        # print(f"Requesting: {url}")  # Show each URL before requesting

        response = requests.get(url)

        # if response.from_cache:
        #     print(f"Cache hit: {url}")

        if response.status_code == 401:
            print("Authentication failed! Check your API key.")
            return None

        elif response.status_code != 200:
            print(f"Error fetching mappings: {response.status_code} - {response.text}")
            return None

        data = response.json()

        mappings_pages[data['page']] = data


        # Handle pagination
        url = None  # Default to None
        next_link = data.get("links", {}).get("nextPage")
        if next_link:
            url = next_link

        # To avoid hitting API rate limits, add a delay if request is live
        if not response.from_cache:
            sleep(1)

    return mappings_pages

In [5]:
mappings = fetch_all_mappings(ONTOLOGY)


In [30]:
# Define URL-to-CURIE mapping
URL_TO_CURIE = {
    "https://w3id.org/metpo/": "METPO:"
}

def convert_to_curie(ontology_id):
    """
    Converts a full ontology URL to CURIE format if a mapping exists.
    Otherwise, returns the original ID.
    """
    for url_prefix, curie_prefix in URL_TO_CURIE.items():
        if ontology_id.startswith(url_prefix):
            return ontology_id.replace(url_prefix, curie_prefix)
    return ontology_id  # Return as-is if no mapping applies

def extract_loom_mappings(mappings_pages):
    """
    Iterates through pages of BioPortal mappings and extracts LOOM mappings as a list of dictionaries.
    Converts specified URLs to CURIE format.

    :param mappings_pages: Dictionary of pages containing mapping data.
    :return: List of dictionaries with mapping details.
    """
    loom_mappings = []

    for page_num, page_data in mappings_pages.items():
        if "collection" not in page_data:
            continue  # Skip if no mappings on this page

        for mapping in page_data["collection"]:
            if mapping.get("source") == "LOOM" and "classes" in mapping:
                classes = mapping["classes"]
                if len(classes) == 2:  # Ensure it's a pairwise mapping
                    mapping_dict = {
                        "source_id": convert_to_curie(classes[0].get("@id")),
                        "target_id": convert_to_curie(classes[1].get("@id")),
                        "target_ontology": classes[1]["links"].get("ontology"),
                    }
                    loom_mappings.append(mapping_dict)

    return loom_mappings



In [31]:
loom_mappings = extract_loom_mappings(mappings)

In [32]:
loom_mappings_frame = pd.DataFrame(loom_mappings)

In [33]:
loom_mappings_frame.shape

(3479, 3)

In [34]:
loom_mappings_frame.drop_duplicates(inplace=True)

In [35]:
loom_mappings_frame.shape

(3469, 3)

In [37]:
loom_mappings_frame.to_csv("metpo_loom_mappings.tsv", index=False, sep="\t")