In [12]:
!python --version
import requests
import yaml

Python 3.10.9


# BTE- APIs referencing

In [11]:
#Download the raw YAML and parse it
url = "https://raw.githubusercontent.com/biothings/bte-server/main/config/api_list.yaml"
resp = requests.get(url)
resp.raise_for_status()
text = resp.text

data = yaml.safe_load(text)

api_entries = data.get("include", [])

# Extract ids and names
ids   = [entry["id"]   for entry in api_entries if isinstance(entry, dict) and "id"   in entry]
names = [entry["name"] for entry in api_entries if isinstance(entry, dict) and "name" in entry]


print(f"Found {len(ids)} ids and {len(names)} names.\n")
print("Names:")
for nm in names:
    print("–", nm)



Found 61 ids and 61 names.

Names:
– Monarch API
– CTD API
– Complex Portal Web Service
– EBI Proteins API
– LitVar API
– QuickGO API
– Ontology Lookup Service API
– BioThings AGR API
– BioThings BindingDB API
– BioThings BioPlanet Pathway-Disease API
– BioThings BioPlanet Pathway-Gene API
– BioThings DDInter API
– BioThings DGIdb API
– BioThings DISEASES API
– BioThings EBIgene2phenotype API
– BioThings FooDB API
– BioThings FoodData Central API
– BioThings GO Biological Process API
– BioThings GO Cellular Component API
– BioThings GO Molecular Function API
– BioThings GTRx API
– BioThings HPO API
– BioThings IDISK API
– BioThings InnateDB API
– BioThings mabs API
– BioThings MGIgene2phenotype API
– BioThings PFOCR API
– BioThings RARe-SOURCE API
– BioThings repoDB API
– BioThings Rhea API
– BioThings SEMMEDDB API
– BioThings SuppKG API
– Biothings Therapeutic Target Database API
– BioThings UBERON API
– MyChem.info API
– MyDisease.info API
– MyGene.info API
– MyVariant.info API
– Mul

In [13]:
import requests
import yaml

def fetch_infores_catalog(raw_url=None):
    """
    Fetch and parse the infores_catalog.yaml from the Biolink GitHub repo.
    Returns the parsed YAML doc
    """
    if raw_url is None:
        raw_url = (
            "https://raw.githubusercontent.com/"
            "biolink/information-resource-registry/main/infores_catalog.yaml"
        )
    resp = requests.get(raw_url)
    resp.raise_for_status()
    return yaml.safe_load(resp.text)

def extract_infores_ids(catalog):
    """
    Given the parsed YAML (catalog), find all 'id' fields in the infores entries.
    Returns a list of all IDs and a set of unique IDs.
    """
    
    if isinstance(catalog, dict):
        # common key patterns in LinkML‐generated YAML
        for candidate in ("information_resources", "infores", "infores_catalog"):
            if candidate in catalog:
                entries = catalog[candidate]
                break
        else:
            
            entries = list(catalog.values())
    elif isinstance(catalog, list):
        entries = catalog
    else:
        raise ValueError("Unexpected YAML structure for infores catalog")
    
    # Pull out all 'id' values
    ids = []
    for entry in entries:
        if isinstance(entry, dict) and "id" in entry:
            ids.append(entry["id"])
    unique_ids = set(ids)
    return ids, unique_ids

In [14]:
catalog = fetch_infores_catalog()
all_ids, unique_ids = extract_infores_ids(catalog)

print(f"Total IDs: {len(all_ids)}")
print(f"Unique IDs: {len(unique_ids)}")

Total IDs: 444
Unique IDs: 443


In [15]:
from collections import Counter

id_counts = Counter(all_ids)

# Extract only those IDs appearing more than once
duplicate_ids = [iid for iid, cnt in id_counts.items() if cnt > 1]

print(f"Found {len(duplicate_ids)} duplicate IDs:\n")
for iid in duplicate_ids:
    print(f"  • {iid} appears {id_counts[iid]} times")

Found 1 duplicate IDs:

  • infores:catrax-pharmacogenomics appears 2 times
