In [1]:
from posteriordb import PosteriorDatabase
import os
import pandas as pd
import numpy as np

In [2]:
# Load DataBase Locally
pdb_path = os.path.join("../posteriordb/posterior_database")
my_pdb = PosteriorDatabase(pdb_path)

In [3]:
# Extract the Names of All Models
pos = my_pdb.posterior_names()

In [4]:
# Expand Nested List
def flat(nums):
    res = []
    for i in nums:
        if isinstance(i, list):
            res.extend(flat(i))
        else:
            res.append(i)
    return res

In [5]:
# Reordering Models in Ascending Dimensional Order
d = {}
n = 0
for i in pos:
    try:
        d[i] = sum(my_pdb.posterior(i).information['dimensions'].values())
    except TypeError:
        d[i] = sum(flat(my_pdb.posterior(i).information['dimensions'].values()))
df = pd.DataFrame.from_dict(d, orient='index', columns=['dimensions'])
df.sort_values(by=['dimensions'], ascending=True, inplace=True)

In [6]:
df.head(10)

Unnamed: 0,dimensions
Rate_3_data-Rate_3_model,1
Rate_1_data-Rate_1_model,1
dogs-dogs_hierarchical,2
dogs-dogs_log,2
wells_data-wells_dist100_model,2
nes_logit_data-nes_logit_model,2
wells_data-wells_dist,2
mesquite-logmesquite_logvolume,3
wells_data-wells_dist100ars_model,3
kilpisjarvi_mod-kilpisjarvi,3


In [7]:
df.tail(10)

Unnamed: 0,dimensions
Mtbh_data-Mtbh_model,1912
GLMM_data-GLMM1_model,2395
ovarian-logistic_regression_rhs,3075
traffic_accident_nyc-bym2_offset_only,3845
Mth_data-Mth_model,5044
prideprejudice_chapter-ldaK5,7714
mnist_100-nn_rbm1bJ10,7949
prostate-logistic_regression_rhs,11935
prideprejudice_paragraph-ldaK5,15570
mnist-nn_rbm1bJ100,79409


In [8]:
# Determining Whether the Model has a Gold Standard
no_gs = []
for i in pos:
    posterior = my_pdb.posterior(i)
    try:
        gs = posterior.reference_draws()
    except AssertionError:
        no_gs.append(i)

In [9]:
# Number of Models without a Gold Standard
len(no_gs)

100

In [10]:
# Number of Models
len(pos)

147

In [11]:
# Models with a Gold Standard
gs_models = set(pos).difference(set(no_gs))
len(gs_models)

47

In [12]:
df_gs = df.loc[gs_models]
df_gs.sort_values(by=['dimensions'], ascending=True, inplace=True)
df_gs

  df_gs = df.loc[gs_models]


Unnamed: 0,dimensions
gp_pois_regr-gp_regr,3
earnings-logearn_height,3
earnings-earn_height,3
kidiq-kidscore_momhs,3
kidiq-kidscore_momiq,3
mesquite-logmesquite_logvolume,3
kilpisjarvi_mod-kilpisjarvi,3
earnings-log10earn_height,3
kidiq-kidscore_momhsiq,4
earnings-logearn_height_male,4
