# Twin Identifier Analysis

This notebook looks for local unique identifiers that are the same in different biomedical ontologies, databases, and identifier spaces. The fact that there are lots of overlaps shows that the need for well-defined prefixes is incredibly important.

In [1]:
import json
from itertools import combinations
from operator import itemgetter

import bioregistry
import networkx as nx
import pandas as pd
import pyobo
from tqdm.auto import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm

Extract the list of local unique identifiers from all resources gettable from PyOBO. Note that these local unique identifiers are normalized by the Bioregistry to remove potential redundant prefixes.

In [3]:
skip = {"umls", "kegg", "icd10", "icd11"}
resources = [
    resource
    for resource in bioregistry.resources()
    if (resource.has_download() or pyobo.has_nomenclature_plugin(resource.prefix)) 
    and not resource.is_deprecated()
    and resource.prefix not in skip
]
assert "mesh" in {r.prefix for r in resources}

In [4]:
ids = {}
with logging_redirect_tqdm():
    it = tqdm(resources, unit="prefix", unit_scale=True)
    failed_prefixes = set()
    succeeded_prefixes = set()
    for resource in it:
        it.set_postfix(failed=len(failed_prefixes), succeeded=len(succeeded_prefixes), current=resource.prefix)
        try:
            resource_ids = pyobo.get_ids(resource.prefix, strict=False)
        except Exception:
            tqdm.write(f"failed on {resource.prefix}")
            failed_prefixes.add(resource.prefix)
            continue
        else:
            if resource_ids:
                ids[resource.prefix] = resource_ids
            succeeded_prefixes.add(resource.prefix)

  0%|          | 0.00/362 [00:00<?, ?prefix/s]

failed on ado


INFO: [2023-02-10 13:39:21] pystow.utils - downloading with urllib from http://purl.allotrope.org/voc/afo/merged-OLS/REC/2019/05/10 to /Users/cthoyt/.data/pyobo/raw/afo/2019-05-10/10


failed on aeon
failed on afo
failed on apaonto
failed on apollosv


[Fatal Error] :67773:173: The reference to entity "idAhol" must end with the ';' delimiter.


failed on atol
failed on bao
failed on bcio
failed on bco


INFO: [2023-02-10 13:40:05] pyobo.reader - [biolink] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/biolink/biolink.obo
INFO: [2023-02-10 13:40:05] pyobo.reader - [biolink] extracting OBO using obonet
INFO: [2023-02-10 13:40:05] pyobo.reader - [biolink] does not report a date


failed on biolink
failed on biomodels.kisao


INFO: [2023-02-10 13:40:09] pyobo.reader - [biomodels.teddy] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/biomodels.teddy/2014-04-24/teddy-inferred.obo
INFO: [2023-02-10 13:40:09] pyobo.reader - [biomodels.teddy] extracting OBO using obonet
INFO: [2023-02-10 13:40:09] pyobo.reader - [biomodels.teddy] does not report a date
INFO: [2023-02-10 13:40:09] pyobo.reader - [biomodels.teddy] using version 2014-04-24


failed on biomodels.teddy


INFO: [2023-02-10 13:40:11] pyobo.getters - [ccle] caching nomenclature plugin


failed on bko


[Fatal Error] :16:76: Attribute name "crossorigin" associated with an element type "link" must be followed by the ' = ' character.


failed on cdt
failed on cheminf


INFO: [2023-02-10 13:40:26] pyobo.reader - [chemrof] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/chemrof/chemrof.obo
INFO: [2023-02-10 13:40:26] pyobo.reader - [chemrof] extracting OBO using obonet
INFO: [2023-02-10 13:40:26] pyobo.reader - [chemrof] does not report a date


failed on chemrof


INFO: [2023-02-10 13:40:31] pyobo.reader - [classyfire] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/classyfire/ChemOnt_2_1.obo.zip


failed on cido
failed on classyfire
failed on clo


INFO: [2023-02-10 13:40:43] pyobo.reader - [co_320] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_320/2019-10-09/owl.obo
INFO: [2023-02-10 13:40:43] pyobo.reader - [co_320] extracting OBO using obonet
INFO: [2023-02-10 13:40:43] pyobo.reader - [co_320] does not report a date


failed on co_320


INFO: [2023-02-10 13:40:45] pyobo.reader - [co_321] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_321/2019-10-09/owl.obo
INFO: [2023-02-10 13:40:45] pyobo.reader - [co_321] extracting OBO using obonet
INFO: [2023-02-10 13:40:45] pyobo.reader - [co_321] does not report a date


failed on co_321


INFO: [2023-02-10 13:40:46] pyobo.reader - [co_322] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_322/2019-10-09/owl.obo
INFO: [2023-02-10 13:40:46] pyobo.reader - [co_322] extracting OBO using obonet
INFO: [2023-02-10 13:40:46] pyobo.reader - [co_322] does not report a date


failed on co_322


INFO: [2023-02-10 13:40:48] pyobo.reader - [co_323] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_323/2019-11-20/owl.obo
INFO: [2023-02-10 13:40:48] pyobo.reader - [co_323] extracting OBO using obonet
INFO: [2023-02-10 13:40:48] pyobo.reader - [co_323] does not report a date


failed on co_323


INFO: [2023-02-10 13:40:49] pyobo.reader - [co_324] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_324/2019-10-09/owl.obo
INFO: [2023-02-10 13:40:49] pyobo.reader - [co_324] extracting OBO using obonet
INFO: [2023-02-10 13:40:49] pyobo.reader - [co_324] does not report a date


failed on co_324
failed on co_325


INFO: [2023-02-10 13:40:52] pyobo.reader - [co_326] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_326/owl.obo
INFO: [2023-02-10 13:40:52] pyobo.reader - [co_326] extracting OBO using obonet
INFO: [2023-02-10 13:40:52] pyobo.reader - [co_326] does not report a date


failed on co_326


INFO: [2023-02-10 13:40:53] pyobo.reader - [co_327] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_327/owl.obo
INFO: [2023-02-10 13:40:53] pyobo.reader - [co_327] extracting OBO using obonet
INFO: [2023-02-10 13:40:53] pyobo.reader - [co_327] does not report a date


failed on co_327


INFO: [2023-02-10 13:40:55] pyobo.reader - [co_330] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_330/2019-10-09/owl.obo
INFO: [2023-02-10 13:40:55] pyobo.reader - [co_330] extracting OBO using obonet
INFO: [2023-02-10 13:40:55] pyobo.reader - [co_330] does not report a date


failed on co_330


INFO: [2023-02-10 13:40:56] pyobo.reader - [co_331] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_331/owl.obo
INFO: [2023-02-10 13:40:57] pyobo.reader - [co_331] extracting OBO using obonet
INFO: [2023-02-10 13:40:57] pyobo.reader - [co_331] does not report a date


failed on co_331
failed on co_333


INFO: [2023-02-10 13:41:00] pyobo.reader - [co_334] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_334/2019-10-09/owl.obo
INFO: [2023-02-10 13:41:00] pyobo.reader - [co_334] extracting OBO using obonet
INFO: [2023-02-10 13:41:00] pyobo.reader - [co_334] does not report a date


failed on co_334


INFO: [2023-02-10 13:41:01] pyobo.reader - [co_335] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_335/owl.obo
INFO: [2023-02-10 13:41:01] pyobo.reader - [co_335] extracting OBO using obonet
INFO: [2023-02-10 13:41:01] pyobo.reader - [co_335] does not report a date


failed on co_335


INFO: [2023-02-10 13:41:02] pyobo.reader - [co_336] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_336/owl.obo
INFO: [2023-02-10 13:41:02] pyobo.reader - [co_336] extracting OBO using obonet
INFO: [2023-02-10 13:41:02] pyobo.reader - [co_336] does not report a date


failed on co_336
failed on co_337


INFO: [2023-02-10 13:41:05] pyobo.reader - [co_338] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_338/2019-10-09/owl.obo
INFO: [2023-02-10 13:41:05] pyobo.reader - [co_338] extracting OBO using obonet
INFO: [2023-02-10 13:41:05] pyobo.reader - [co_338] does not report a date


failed on co_338


INFO: [2023-02-10 13:41:06] pyobo.reader - [co_339] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_339/2019-10-09/owl.obo
INFO: [2023-02-10 13:41:06] pyobo.reader - [co_339] extracting OBO using obonet
INFO: [2023-02-10 13:41:06] pyobo.reader - [co_339] does not report a date


failed on co_339


INFO: [2023-02-10 13:41:07] pyobo.reader - [co_340] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_340/2019-10-09/owl.obo
INFO: [2023-02-10 13:41:07] pyobo.reader - [co_340] extracting OBO using obonet
INFO: [2023-02-10 13:41:07] pyobo.reader - [co_340] does not report a date


failed on co_340


INFO: [2023-02-10 13:41:08] pyobo.reader - [co_341] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_341/2019-11-18/owl.obo
INFO: [2023-02-10 13:41:08] pyobo.reader - [co_341] extracting OBO using obonet
INFO: [2023-02-10 13:41:08] pyobo.reader - [co_341] does not report a date


failed on co_341


INFO: [2023-02-10 13:41:10] pyobo.reader - [co_343] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_343/2019-10-09/owl.obo
INFO: [2023-02-10 13:41:10] pyobo.reader - [co_343] extracting OBO using obonet
INFO: [2023-02-10 13:41:10] pyobo.reader - [co_343] does not report a date


failed on co_343


INFO: [2023-02-10 13:41:11] pyobo.reader - [co_345] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_345/2019-10-09/owl.obo
INFO: [2023-02-10 13:41:11] pyobo.reader - [co_345] extracting OBO using obonet
INFO: [2023-02-10 13:41:11] pyobo.reader - [co_345] does not report a date


failed on co_345


INFO: [2023-02-10 13:41:12] pyobo.reader - [co_346] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_346/2019-10-09/owl.obo
INFO: [2023-02-10 13:41:12] pyobo.reader - [co_346] extracting OBO using obonet
INFO: [2023-02-10 13:41:12] pyobo.reader - [co_346] does not report a date


failed on co_346


INFO: [2023-02-10 13:41:13] pyobo.reader - [co_347] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_347/owl.obo
INFO: [2023-02-10 13:41:13] pyobo.reader - [co_347] extracting OBO using obonet
INFO: [2023-02-10 13:41:13] pyobo.reader - [co_347] does not report a date


failed on co_347
failed on co_348


INFO: [2023-02-10 13:41:16] pyobo.reader - [co_350] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_350/2019-10-09/owl.obo
INFO: [2023-02-10 13:41:16] pyobo.reader - [co_350] extracting OBO using obonet
INFO: [2023-02-10 13:41:16] pyobo.reader - [co_350] does not report a date


failed on co_350


INFO: [2023-02-10 13:41:17] pyobo.reader - [co_356] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_356/2019-11-29/owl.obo
INFO: [2023-02-10 13:41:18] pyobo.reader - [co_356] extracting OBO using obonet
INFO: [2023-02-10 13:41:18] pyobo.reader - [co_356] does not report a date


failed on co_356


Exception in thread "main" java.lang.NoClassDefFoundError: javax/xml/bind/annotation/adapters/HexBinaryAdapter
	at org.openrdf.rio.helpers.RDFParserBase.createBNode(RDFParserBase.java:485)
	at org.openrdf.rio.rdfxml.RDFXMLParser.createBNode(RDFXMLParser.java:1046)
	at org.openrdf.rio.rdfxml.RDFXMLParser.getNodeResource(RDFXMLParser.java:729)
	at org.openrdf.rio.rdfxml.RDFXMLParser.processNodeElt(RDFXMLParser.java:621)
	at org.openrdf.rio.rdfxml.RDFXMLParser.startElement(RDFXMLParser.java:527)
	at org.openrdf.rio.rdfxml.SAXFilter.reportDeferredStartElement(SAXFilter.java:334)
	at org.openrdf.rio.rdfxml.SAXFilter.startElement(SAXFilter.java:257)
	at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.startElement(AbstractSAXParser.java:510)
	at java.xml/com.sun.org.apache.xerces.internal.parsers.AbstractXMLDocumentParser.emptyElement(AbstractXMLDocumentParser.java:183)
	at java.xml/com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.scanStartElement(XMLNSDo

failed on co_357


INFO: [2023-02-10 13:41:20] pyobo.reader - [co_358] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_358/2020-01-03/owl.obo
INFO: [2023-02-10 13:41:20] pyobo.reader - [co_358] extracting OBO using obonet
INFO: [2023-02-10 13:41:20] pyobo.reader - [co_358] does not report a date


failed on co_358


INFO: [2023-02-10 13:41:22] pyobo.reader - [co_359] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_359/owl.obo
INFO: [2023-02-10 13:41:22] pyobo.reader - [co_359] extracting OBO using obonet
INFO: [2023-02-10 13:41:22] pyobo.reader - [co_359] does not report a date


failed on co_359


INFO: [2023-02-10 13:41:23] pyobo.reader - [co_360] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_360/owl.obo
INFO: [2023-02-10 13:41:23] pyobo.reader - [co_360] extracting OBO using obonet
INFO: [2023-02-10 13:41:23] pyobo.reader - [co_360] does not report a date


failed on co_360


INFO: [2023-02-10 13:41:24] pyobo.reader - [co_365] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_365/2019-10-15/owl.obo
INFO: [2023-02-10 13:41:24] pyobo.reader - [co_365] extracting OBO using obonet
INFO: [2023-02-10 13:41:24] pyobo.reader - [co_365] does not report a date


failed on co_365


INFO: [2023-02-10 13:41:25] pyobo.reader - [co_366] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/co_366/owl.obo
INFO: [2023-02-10 13:41:25] pyobo.reader - [co_366] extracting OBO using obonet
INFO: [2023-02-10 13:41:25] pyobo.reader - [co_366] does not report a date


failed on co_366
failed on cob
failed on cto
failed on datacite




failed on dcat


INFO: [2023-02-10 13:41:38] pyobo.getters - [depmap] caching nomenclature plugin
INFO: [2023-02-10 13:41:38] pyobo.getters - [dictybase] caching nomenclature plugin

  0%|                                                 | 0/14222 [00:00<?, ?it/s][A

  0%|                                                 | 0/14222 [00:00<?, ?it/s][A
  0%|                                        | 23/14222 [00:01<12:36, 18.76it/s][A
  2%|▊                                     | 307/14222 [00:01<00:49, 280.03it/s][A
 48%|█████████████████▎                  | 6836/14222 [00:01<00:01, 6621.41it/s][A
100%|███████████████████████████████████| 14222/14222 [00:01<00:00, 7139.36it/s][A


failed on dideo


INFO: [2023-02-10 13:41:44] pyobo.getters - [drugbank] caching nomenclature plugin


failed on drugbank


INFO: [2023-02-10 13:41:46] pyobo.getters - [drugcentral] caching nomenclature plugin


loading xrefs:   0%|          | 0.00/80.7k [00:00<?, ?it/s]

failed on dto
failed on duo
failed on ecg


INFO: [2023-02-10 13:42:54] pyobo.reader - [emapa] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/emapa/2022-07-14/emapa.obo


failed on edda


INFO: [2023-02-10 13:42:55] pyobo.reader - [emapa] extracting OBO using obonet
INFO: [2023-02-10 13:42:55] pyobo.reader - [emapa] using version 2022-08-31


failed on emapa


[Fatal Error] :188:80: Element type "header" must be followed by either attribute specifications, ">" or "/>".


failed on ensemblglossary
failed on eol
failed on epio
failed on epso
failed on eupath


INFO: [2023-02-10 13:43:03] pyobo.reader - [fbbi] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/fbbi/2020-11-06/fbbi.obo
INFO: [2023-02-10 13:43:03] pyobo.reader - [fbbi] extracting OBO using obonet
INFO: [2023-02-10 13:43:03] pyobo.reader - [fbbi] does not report a date
INFO: [2023-02-10 13:43:03] pyobo.reader - [fbbi] using version 2020-11-06


failed on faldo
failed on fbbi
failed on fideo


INFO: [2023-02-10 13:43:06] pyobo.getters - [flybase] caching nomenclature plugin
  return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)

  0%|                                                 | 0/35723 [00:00<?, ?it/s][A
  1%|▌                                    | 535/35723 [00:00<00:12, 2861.36it/s][A
 20%|███████                            | 7235/35723 [00:00<00:01, 18584.70it/s][A
 40%|█████████████▋                    | 14411/35723 [00:00<00:00, 23143.39it/s][A
 62%|█████████████████████             | 22183/35723 [00:00<00:00, 25045.00it/s][A
100%|██████████████████████████████████| 35723/35723 [00:01<00:00, 26737.59it/s][A
INFO: [2023-02-10 13:43:11] pyobo.reader - [foaf] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/foaf/foaf.obo
INFO: [2023-02-10 13:43:11] pyobo.reader - [foaf] extracting OBO using obonet
INFO: [2023-02-10 13:43:11] pyobo.reader - [foaf] does not report a date


failed on foaf
failed on genepio
failed on geo
failed on geogeo
failed on gfo


INFO: [2023-02-10 13:43:29] pystow.utils - downloading with urllib from ftp://ftp.geneontology.org/pub/go/ontology/extensions/gorel.owl to /Users/cthoyt/.data/pyobo/raw/gorel/gorel.owl


failed on gorel


INFO: [2023-02-10 13:44:46] pyobo.reader - [hancestro] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/hancestro/2.6/hancestro.obo
INFO: [2023-02-10 13:44:46] pyobo.reader - [hancestro] extracting OBO using obonet
INFO: [2023-02-10 13:44:46] pyobo.reader - [hancestro] does not report a date
INFO: [2023-02-10 13:44:46] pyobo.reader - [hancestro] using version 2022-05-12
INFO: [2023-02-10 13:44:46] pystow.utils - downloading with urllib from http://ontology.data.humancellatlas.org/ontologies/hcao/releases/2022-12-16/hcao.owl to /Users/cthoyt/.data/pyobo/raw/hcao/2022-12-16/hcao.owl
INFO: [2023-02-10 13:44:46] pyobo.getters - [hgnc] caching nomenclature plugin
INFO: [2023-02-10 13:44:46] pystow.utils - downloading with urllib from https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/json/hgnc_complete_set_2023-02-01.json to /Users/cthoyt/.data/pyobo/raw/hgnc/2023-02-01/hgnc_complete_set.json
INFO: [2023-02-10 13:44:46] pystow.utils - downloading with urllib from https

failed on hancestro
failed on hcao
failed on hgnc
failed on hoip


INFO: [2023-02-10 13:44:51] pyobo.reader - [hpath] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/hpath/2019-07-05/hpath.obo


failed on hpath
failed on hso
failed on htn
failed on icdo
failed on iceo
failed on ico
failed on ido
failed on idocovid19
failed on idoden
failed on interpro


INFO: [2023-02-10 13:45:27] pyobo.getters - [itis] caching nomenclature plugin
INFO: [2023-02-10 13:45:27] pystow.utils - downloading with urllib from https://www.itis.gov/downloads/itisSqlite.zip to /Users/cthoyt/.data/pyobo/raw/itis/30-Jan-2023/itisSqlite.zip


failed on ito
failed on labo
failed on mdm
failed on mfmo
failed on mfoem
failed on miapa
failed on micro
failed on mosaic
failed on mpio
failed on msigdb


INFO: [2023-02-10 13:49:12] pyobo.api.names - [ncbigene] loading name mappings


failed on msio


INFO: [2023-02-10 13:51:02] pyobo.api.names - [ncbigene] done loading name mappings


failed on nemo
failed on npo
failed on oae
failed on obcs
failed on obib
failed on ogg
failed on ogsf
failed on ohd
failed on ohmi
failed on ohpi




failed on om




failed on ons
failed on ontoneo
failed on oostt
failed on opb
failed on opl


INFO: [2023-02-10 13:53:58] pyobo.reader - [ornaseq] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/ornaseq/2019-07-08/ornaseq.obo
INFO: [2023-02-10 13:53:58] pyobo.reader - [ornaseq] extracting OBO using obonet
INFO: [2023-02-10 13:53:58] pyobo.reader - [ornaseq] does not report a date
INFO: [2023-02-10 13:53:58] pyobo.reader - [ornaseq] using version 2019-07-08


failed on opmi
failed on ornaseq
failed on orphanet.ordo
failed on orth
failed on ovae


INFO: [2023-02-10 13:54:32] pyobo.reader - [owl] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/owl/download.trig.obo


failed on owl


INFO: [2023-02-10 13:54:33] pyobo.reader - [pcl] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/pcl/2022-10-19/pcl.obo
INFO: [2023-02-10 13:54:51] pyobo.reader - [pcl] extracting OBO using obonet
INFO: [2023-02-10 13:54:52] pyobo.reader - [pcl] does not report a date
INFO: [2023-02-10 13:54:52] pyobo.reader - [pcl] using version 2022-10-19


failed on pcl
failed on pdro


INFO: [2023-02-10 13:54:59] pyobo.reader - [pmr] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/pmr/pmr.obo
INFO: [2023-02-10 13:54:59] pyobo.reader - [pmr] extracting OBO using obonet
INFO: [2023-02-10 13:54:59] pyobo.reader - [pmr] does not report a date


failed on pmr


INFO: [2023-02-10 13:55:00] pyobo.getters - [pombase] caching nomenclature plugin
INFO: [2023-02-10 13:55:00] pyobo.getters - [hgnc] caching nomenclature plugin
INFO: [2023-02-10 13:55:00] pystow.utils - downloading with urllib from https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/json/hgnc_complete_set_2023-02-01.json to /Users/cthoyt/.data/pyobo/raw/hgnc/2023-02-01/hgnc_complete_set.json


failed on pombase
failed on ppo


INFO: [2023-02-10 13:55:09] pyobo.reader - [pride] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/pride/2012-03-06/pride_cv.obo


failed on pride
failed on proco
failed on prov
failed on qudt


INFO: [2023-02-10 13:55:21] pyobo.getters - [reactome] caching nomenclature plugin

mapping reactome:   0%|                               | 0/22040 [00:00<?, ?it/s][A
mapping reactome:  30%|█████            | 6507/22040 [00:00<00:00, 64861.76it/s][A
mapping reactome:  59%|█████████▍      | 12994/22040 [00:00<00:00, 64670.99it/s][A
mapping reactome: 100%|████████████████| 22040/22040 [00:00<00:00, 64480.99it/s][A

  0%|                                                | 0/913445 [00:00<?, ?it/s][A
  2%|▋                               | 18298/913445 [00:00<00:04, 182959.67it/s][A
  4%|█▎                              | 37053/913445 [00:00<00:04, 185650.99it/s][A
  6%|█▉                              | 55894/913445 [00:00<00:04, 186902.90it/s][A
  8%|██▋                             | 75342/913445 [00:00<00:04, 189885.76it/s][A
 10%|███▎                            | 94698/913445 [00:00<00:04, 191203.14it/s][A
 12%|███▊                           | 113819/913445 [00:00<00:04, 189618.28

failed on reo
failed on reproduceme


INFO: [2023-02-10 13:56:07] pyobo.getters - [rgd] caching nomenclature plugin
INFO: [2023-02-10 13:56:07] pystow.utils - downloading with urllib from https://download.rgd.mcw.edu/data_release/GENES.RAT.txt to /Users/cthoyt/.data/pyobo/raw/rgd/2023-02-03/GENES.RAT.txt


  return pd.read_csv(_path, sep=sep, dtype=dtype, **kwargs)

Mapping rgd:   0%|                                 | 0.00/66.8k [00:00<?, ?it/s][A
Mapping rgd:   0%|                          | 104/66.8k [00:00<01:04, 1.04kit/s][A
Mapping rgd:   1%|▏                         | 350/66.8k [00:00<00:35, 1.87kit/s][A
Mapping rgd:   1%|▏                         | 627/66.8k [00:00<00:29, 2.28kit/s][A
Mapping rgd:   1%|▎                         | 911/66.8k [00:00<00:26, 2.50kit/s][A
Mapping rgd:   2%|▍                       | 1.24k/66.8k [00:00<00:23, 2.76kit/s][A
Mapping rgd:   2%|▌                       | 1.54k/66.8k [00:00<00:22, 2.87kit/s][A
Mapping rgd:   3%|▋                       | 1.88k/66.8k [00:00<00:21, 3.01kit/s]

Mapping rgd:  71%|█████████████████       | 47.5k/66.8k [00:28<00:03, 6.27kit/s][A
Mapping rgd:  72%|█████████████████▎      | 48.1k/66.8k [00:28<00:02, 6.39kit/s][A
Mapping rgd:  73%|█████████████████▌      | 48.8k/66.8k [00:28<00:02, 6.52kit/s][A
Mapping rgd:  74%|█████████████████▊      | 49.5k/66.8k [00:28<00:02, 6.63kit/s][A
Mapping rgd:  75%|██████████████████      | 50.2k/66.8k [00:28<00:02, 6.68kit/s][A
Mapping rgd:  76%|██████████████████▎     | 50.9k/66.8k [00:29<00:02, 6.68kit/s][A
Mapping rgd:  77%|██████████████████▌     | 51.6k/66.8k [00:29<00:02, 6.71kit/s][A
Mapping rgd:  78%|██████████████████▊     | 52.3k/66.8k [00:29<00:02, 6.79kit/s][A
Mapping rgd:  79%|███████████████████     | 52.9k/66.8k [00:29<00:02, 6.80kit/s][A
Mapping rgd:  80%|███████████████████▎    | 53.6k/66.8k [00:29<00:01, 6.81kit/s][A
Mapping rgd:  81%|███████████████████▌    | 54.3k/66.8k [00:29<00:01, 6.80kit/s][A
Mapping rgd:  82%|███████████████████▊    | 55.0k/66.8k [00:29<00:01, 6.85ki

failed on schema




failed on sdgio
failed on sepio


INFO: [2023-02-10 13:58:48] pyobo.getters - [sgd] caching nomenclature plugin
[Fatal Error] :26:7: The element type "link" must be terminated by the matching end-tag "</link>".


failed on snomedct
failed on sphn
failed on srao


INFO: [2023-02-10 13:59:04] pyobo.reader - [sweetrealm] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/sweetrealm/sweetrealm.obo
INFO: [2023-02-10 13:59:04] pystow.utils - downloading with urllib from https://github.com/allysonlister/swo/raw/master/release/swo.obo to /Users/cthoyt/.data/pyobo/raw/swo/2022-10-11/swo.obo


failed on stato
failed on sweetrealm
failed on swo


INFO: [2023-02-10 13:59:06] pyobo.reader - [time] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/time/time.obo
INFO: [2023-02-10 13:59:06] pyobo.reader - [time] extracting OBO using obonet
INFO: [2023-02-10 13:59:06] pyobo.reader - [time] does not report a date
INFO: [2023-02-10 13:59:06] pyobo.reader - [time] using version http://www.w3.org/2006/time#2016


failed on time
failed on txpo


INFO: [2023-02-10 13:59:21] pyobo.reader - [unimod] parsing with obonet from /Users/cthoyt/.data/pyobo/raw/unimod/2019-11-21/unimod.obo
INFO: [2023-02-10 13:59:22] pyobo.getters - [uniprot] caching nomenclature plugin


failed on unimod



Mapping UniProt: 0it [00:00, ?it/s][A
Mapping UniProt: 6231it [00:00, 62292.39it/s][A
Mapping UniProt: 12461it [00:00, 55660.61it/s][A
Mapping UniProt: 18074it [00:00, 53705.88it/s][A
Mapping UniProt: 23468it [00:00, 53199.15it/s][A
Mapping UniProt: 28800it [00:00, 52524.02it/s][A
Mapping UniProt: 34059it [00:00, 52259.54it/s][A
Mapping UniProt: 39288it [00:00, 51295.26it/s][A
Mapping UniProt: 44434it [00:00, 51344.31it/s][A
Mapping UniProt: 49696it [00:00, 51729.89it/s][A
Mapping UniProt: 55088it [00:01, 52392.29it/s][A
Mapping UniProt: 61229it [00:01, 55118.26it/s][A
Mapping UniProt: 67539it [00:01, 57525.34it/s][A
Mapping UniProt: 73906it [00:01, 59372.82it/s][A
Mapping UniProt: 80349it [00:01, 60891.61it/s][A
Mapping UniProt: 86736it [00:01, 61785.18it/s][A
Mapping UniProt: 93054it [00:01, 62202.68it/s][A
Mapping UniProt: 99552it [00:01, 63035.12it/s][A
Mapping UniProt: 106014it [00:01, 63507.99it/s][A
Mapping UniProt: 112408it [00:01, 63634.61it/s][A
Mapping U

failed on uniprot.disease


INFO: [2023-02-10 14:00:09] pystow.utils - downloading with urllib from https://www.uniprot.org/locations/?query=*&format=obo to /Users/cthoyt/.data/pyobo/raw/uniprot.location/2022-06-09/locations


failed on uniprot.keyword


INFO: [2023-02-10 14:00:09] pystow.utils - downloading with urllib from https://github.com/obophenotype/upheno/blob/master/upheno.obo to /Users/cthoyt/.data/pyobo/raw/upheno/2017-10-31/upheno.obo


failed on uniprot.location
failed on upheno
failed on vido
failed on vo
failed on vso


INFO: [2023-02-10 14:00:31] pyobo.getters - [zfin] caching nomenclature plugin
INFO: [2023-02-10 14:00:31] pystow.utils - downloading with urllib from https://zfin.org/downloads/zdb_history.txt to /Users/cthoyt/.data/pyobo/raw/zfin/9 Feb 2023/alts.tsv
INFO: [2023-02-10 14:00:33] pystow.utils - downloading with urllib from https://zfin.org/downloads/human_orthos.txt to /Users/cthoyt/.data/pyobo/raw/zfin/9 Feb 2023/human_orthos.txt
INFO: [2023-02-10 14:00:36] pystow.utils - downloading with urllib from https://zfin.org/downloads/mouse_orthos.txt to /Users/cthoyt/.data/pyobo/raw/zfin/9 Feb 2023/mouse_orthos.txt
INFO: [2023-02-10 14:00:39] pystow.utils - downloading with urllib from https://zfin.org/downloads/fly_orthos.txt to /Users/cthoyt/.data/pyobo/raw/zfin/9 Feb 2023/fly_orthos.txt
INFO: [2023-02-10 14:00:40] pystow.utils - downloading with urllib from https://zfin.org/downloads/gene.txt to /Users/cthoyt/.data/pyobo/raw/zfin/9 Feb 2023/gene.txt
INFO: [2023-02-10 14:00:42] pystow.utils

failed on zfin


In [5]:
rows = []
prefixes_encountered = set()
total = len(ids) * (len(ids) - 1) / 2
min_terms = 10
for (a_prefix, a), (b_prefix, b) in tqdm(
    combinations(ids.items(), 2), total=total, unit_scale=True
):
    len_a, len_b = len(a), len(b)
    if len_a < min_terms or len_b < min_terms:
        continue

    prefixes_encountered.add(a_prefix)
    prefixes_encountered.add(b_prefix)

    intersection = set(a).intersection(b)
    if not intersection:
        continue

    n_intersection = len(intersection)
    overlap = n_intersection / min(len_a, len_b)
    rows.append((a_prefix, b_prefix, len_a, len_b, n_intersection, round(overlap, 5)))

# Sort descending by normalized overlap
rows = sorted(rows, key=itemgetter(5), reverse=True)
overlap_df = pd.DataFrame(
    rows,
    columns=[
        "source_prefix",
        "target_prefix",
        "source_size",
        "target_size",
        "absolute_overlap",
        "normalized_overlap",
    ],
)
overlap_df.to_csv("edges.tsv", sep="\t", index=False)
overlap_df

  0%|          | 0.00/14.0k [00:00<?, ?it/s]

Unnamed: 0,source_prefix,target_prefix,source_size,target_size,absolute_overlap,normalized_overlap
0,apo,lbo,309,1108,309,1.00000
1,apo,sbo,309,694,309,1.00000
2,bfo,bto,35,6511,35,1.00000
3,bfo,chmo,35,2908,35,1.00000
4,bfo,cmo,35,3025,35,1.00000
...,...,...,...,...,...,...
5325,doid,efo,11207,15540,1,0.00009
5326,doid,mp,11207,13569,1,0.00009
5327,fbbt,tto,17570,38640,1,0.00006
5328,fbbt,xpo,17570,20061,1,0.00006


In [6]:
prefix_rows = [
    (
        prefix,
        bioregistry.get_name(prefix),
        bioregistry.get_example(prefix),
        len(ids[prefix]),
    )
    for prefix in sorted(prefixes_encountered)
]
prefix_df = pd.DataFrame(
    prefix_rows, columns=["prefix", "name", "example_luid", "n_luids"]
)
prefix_df.to_csv("nodes.tsv", sep="\t", index=False)
prefix_df

Unnamed: 0,prefix,name,example_luid,n_luids
0,agro,Agronomy Ontology,00020007,772
1,aism,Ontology for the Anatomy of the Insect Skeleto...,0000027,553
2,amphx,The Amphioxus Development and Anatomy Ontology,1000160,399
3,antibodyregistry,Antibody Registry,493771,1898783
4,apo,Ascomycete phenotype ontology,0000184,309
...,...,...,...,...
153,xpo,Xenopus Phenotype Ontology,0100002,20061
154,zeco,Zebrafish Experimental Conditions Ontology,0000171,161
155,zfa,Zebrafish anatomy and development ontology,0005926,3079
156,zfs,Zebrafish developmental stages ontology,0000050,54


In [7]:
graph = nx.Graph()
for prefix, name, _example, size in prefix_rows:
    graph.add_node(
        prefix,
        name=name,
        size=size,
        prefix=prefix,
        description=bioregistry.get_description(prefix),
    )
for source, target, _, _, absolute_overlap, normalized_overlap in rows:
    graph.add_edge(
        source,
        target,
        weight=normalized_overlap,
        absolute_overlap=absolute_overlap,
    )

In [8]:
nx.write_graphml_lxml(graph, "graph.graphml")

In [9]:
with open("graph.cytoscape.json", "w") as file:
    json.dump(nx.cytoscape_data(graph), file, indent=2)