In [1]:
import bioontologies
import networkx as nx
from collections import defaultdict
import itertools as itt
import bioregistry
import rdflib
import json

In [2]:
r = (
    bioontologies.get_obograph_by_prefix("probonto")
    .guess("probonto")
    .standardize()
)

standardizing nodes:   0%|          | 0.00/1.45k [00:00<?, ?it/s]

standardizing edges:   0%|          | 0.00/2.85k [00:00<?, ?it/s]

In [3]:
rdf_graph = rdflib.Graph()
url = "https://raw.githubusercontent.com/probonto/ontology/master/probonto4ols.owl"
rdf_graph.parse(url, format="ttl")

<Graph identifier=N29afe3c0613c4ed380fedb9fbe0f1d61 (<class 'rdflib.graph.Graph'>)>

In [4]:
labels = r.get_curie_to_name()

In [5]:
def get_instances(identifier):
    return [
        edge.sub
        for edge in r.edges
        if edge.pred == "rdf:type" and edge.obj == f"probonto:{identifier}"
    ]


def get_data_properties(identifier):
    return {
        str(s).replace(
            "http://www.probonto.org/ontology#PROB_", "probonto:"
        ): str(o)
        for s, o in rdf_graph.subject_objects(
            rdflib.URIRef(f"http://www.probonto.org/ontology#PROB_{identifier}")
        )
    }

All distributions are individuals with a given type (http://www.probonto.org/ontology#PROB_c0000020)

In [6]:
distributions = get_instances("c0000020")

All parameters are annotated with a specific object property (i.e., predicate).

In [7]:
distribution_to_parameters = defaultdict(list)
for edge in r.edges:
    if edge.pred == "probonto:c0000062":
        distribution_to_parameters[edge.sub].append(edge.obj)
distribution_to_parameters = dict(distribution_to_parameters)
distribution_to_parameters

{'probonto:k0001091': ['probonto:k0001095',
  'probonto:k0001093',
  'probonto:k0001094'],
 'probonto:k0001293': ['probonto:k0001299',
  'probonto:k0001298',
  'probonto:k0001297',
  'probonto:k0001296'],
 'probonto:k0001230': ['probonto:k0001234',
  'probonto:k0001235',
  'probonto:k0001233'],
 'probonto:k0000517': ['probonto:k0000525', 'probonto:k0000524'],
 'probonto:k0000543': ['probonto:k0000551',
  'probonto:k0000552',
  'probonto:k0000550'],
 'probonto:k0000644': ['probonto:k0000652',
  'probonto:k0000653',
  'probonto:k0000650',
  'probonto:k0000651'],
 'probonto:k0000562': ['probonto:k0000569', 'probonto:k0000570'],
 'probonto:k0000719': ['probonto:k0000725', 'probonto:k0000726'],
 'probonto:k0000410': ['probonto:k0000417'],
 'probonto:k0000169': ['probonto:k0000174',
  'probonto:k0000175',
  'probonto:k0000176'],
 'probonto:k0000790': ['probonto:k0000797',
  'probonto:k0000798',
  'probonto:k0000799'],
 'probonto:k0001236': ['probonto:k0001242'],
 'probonto:k0001337': ['probo

In [8]:
parameters = set(itt.chain.from_iterable(distribution_to_parameters.values()))
parameters

{'probonto:k0000007',
 'probonto:k0000014',
 'probonto:k0000015',
 'probonto:k0000019',
 'probonto:k0000020',
 'probonto:k0000021',
 'probonto:k0000026',
 'probonto:k0000027',
 'probonto:k0000031',
 'probonto:k0000039',
 'probonto:k0000040',
 'probonto:k0000048',
 'probonto:k0000049',
 'probonto:k0000053',
 'probonto:k0000054',
 'probonto:k0000055',
 'probonto:k0000056',
 'probonto:k0000066',
 'probonto:k0000067',
 'probonto:k0000073',
 'probonto:k0000080',
 'probonto:k0000081',
 'probonto:k0000088',
 'probonto:k0000089',
 'probonto:k0000095',
 'probonto:k0000096',
 'probonto:k0000097',
 'probonto:k0000110',
 'probonto:k0000111',
 'probonto:k0000115',
 'probonto:k0000116',
 'probonto:k0000124',
 'probonto:k0000125',
 'probonto:k0000132',
 'probonto:k0000133',
 'probonto:k0000134',
 'probonto:k0000140',
 'probonto:k0000141',
 'probonto:k0000146',
 'probonto:k0000147',
 'probonto:k0000148',
 'probonto:k0000152',
 'probonto:k0000153',
 'probonto:k0000159',
 'probonto:k0000160',
 'probonto

In [9]:
node = next(node for node in r.nodes if node.luid == "k0000596")
node

Node(id='http://www.probonto.org/ontology#PROB_k0000596', lbl='maximum of Standard-Uniform-1', meta=None, type=None, prefix='probonto', luid='k0000596', standardized=True)

In [10]:
object_to_short_name = get_data_properties("c0000051")

In [11]:
object_to_latex = get_data_properties("c0000031")

In [12]:
from_distributions = {}
to_distributions = {}
for edge in r.edges:
    if edge.pred == "probonto:c0000071":
        from_distributions[edge.sub] = edge.obj
    elif edge.pred == "probonto:c0000072":
        to_distributions[edge.sub] = edge.obj

from_distributions

{'probonto:k0000970': 'probonto:k0000453',
 'probonto:k0000920': 'probonto:k0000428',
 'probonto:k0000867': 'probonto:k0000587',
 'probonto:k0001423': 'probonto:k0000331',
 'probonto:k0001451': 'probonto:k0001230',
 'probonto:k0001414': 'probonto:k0001360',
 'probonto:k0001412': 'probonto:k0001082',
 'probonto:k0001436': 'probonto:k0000239',
 'probonto:k0000863': 'probonto:k0000239',
 'probonto:k0000972': 'probonto:k0000126',
 'probonto:k0000932': 'probonto:k0000074',
 'probonto:k0001422': 'probonto:k0000307',
 'probonto:k0001409': 'probonto:k0001279',
 'probonto:k0001432': 'probonto:k0001344',
 'probonto:k0000864': 'probonto:k0000142',
 'probonto:k0000928': 'probonto:k0000239',
 'probonto:k0001445': 'probonto:k0001243',
 'probonto:k0000823': 'probonto:k0000410',
 'probonto:k0000901': 'probonto:k0000057',
 'probonto:k0000828': 'probonto:k0000535',
 'probonto:k0000907': 'probonto:k0000307',
 'probonto:k0001416': 'probonto:k0001286',
 'probonto:k0000923': 'probonto:k0000135',
 'probonto:

In [13]:
same_distribution = defaultdict(list)
for reparametrization in get_instances("c0000065"):
    same_distribution[from_distributions[reparametrization]].append(
        to_distributions[reparametrization]
    )

same_distribution = dict(same_distribution)
same_distribution

{'probonto:k0000290': ['probonto:k0000265', 'probonto:k0000239'],
 'probonto:k0000074': ['probonto:k0000190',
  'probonto:k0000105',
  'probonto:k0000135',
  'probonto:k0000161'],
 'probonto:k0000735': ['probonto:k0000712'],
 'probonto:k0000478': ['probonto:k0000553',
  'probonto:k0000500',
  'probonto:k0000428',
  'probonto:k0001028',
  'probonto:k0000453',
  'probonto:k0000526'],
 'probonto:k0000135': ['probonto:k0000074',
  'probonto:k0000217',
  'probonto:k0000161',
  'probonto:k0000190',
  'probonto:k0000105'],
 'probonto:k0000428': ['probonto:k0000453',
  'probonto:k0001028',
  'probonto:k0000478',
  'probonto:k0000526',
  'probonto:k0000500',
  'probonto:k0000553'],
 'probonto:k0000386': ['probonto:k0000604'],
 'probonto:k0000105': ['probonto:k0000074',
  'probonto:k0000161',
  'probonto:k0000190',
  'probonto:k0000135'],
 'probonto:k0000635': ['probonto:k0001091', 'probonto:k0000613'],
 'probonto:k0001028': ['probonto:k0000526',
  'probonto:k0000553',
  'probonto:k0000478',
  '

In [14]:
results = []
for distribution_curie, ps in distribution_to_parameters.items():
    v = {
        "curie": distribution_curie,
        "name": labels[distribution_curie],
        "equivalent": [
            {"curie": eq, "name": labels[eq]}
            for eq in same_distribution.get(distribution_curie, [])
        ],
    }
    parameters = []
    for p in ps:
        d = {
            "curie": p,
            "name": labels[p],
            "symbol": object_to_latex[p],
            "short_name": object_to_short_name.get(p)
            or labels[p].split(" of ")[0],
        }
        parameters.append(d)
    v["parameters"] = parameters
    results.append(v)

In [15]:
results

[{'curie': 'probonto:k0001091',
  'name': "Student's t-distribution 3",
  'equivalent': [{'curie': 'probonto:k0000635',
    'name': "Student's t-distribution 2"}],
  'parameters': [{'curie': 'probonto:k0001095',
    'name': 'scale of Student-s-t-distribution-3',
    'symbol': '\\sigma',
    'short_name': 'scale'},
   {'curie': 'probonto:k0001093',
    'name': 'degrees of freedom of Student-s-t-distribution-3',
    'symbol': '\\nu',
    'short_name': 'degrees of freedom'},
   {'curie': 'probonto:k0001094',
    'name': 'mean of Student-s-t-distribution-3',
    'symbol': '\\mu',
    'short_name': 'mean'}]},
 {'curie': 'probonto:k0001293',
  'name': 'Johnson SB 1',
  'equivalent': [],
  'parameters': [{'curie': 'probonto:k0001299',
    'name': 'scale parameter of Johnson-SB-1',
    'symbol': '\\sigma',
    'short_name': 'scale parameter'},
   {'curie': 'probonto:k0001298',
    'name': 'location parameter of Johnson-SB-1',
    'symbol': '\\mu',
    'short_name': 'location parameter'},
   {'

In [16]:
with open("probability_distributions.json", "w") as file:
    json.dump(results, file, indent=2, ensure_ascii=False)