This file runs the Bayesian model comparison between the two binary and two continuous models, and shows which latent space geometry is preferred.

In [93]:
import numpy as np
from decimal import Decimal

In [94]:
folder = os.path.join(os.getcwd(),'BayesianModelComparison')

In [95]:
n_subjects = 100
edge_types = ['bin', 'con']
geometries = ['euc', 'hyp']
tasks = ['REST1', 'REST2','EMOTION','GAMBLING','LANGUAGE','MOTOR','RELATIONAL','SOCIAL','WM']

During embedding, we save information to a log file saying  
_"S1 task EMOTION has been embedded in x iterations with lml=....."_  
We find the '=' character, and take everything to the right of this as the log-marginal likelihood value.

In [96]:
lml = np.zeros((len(edge_types), len(geometries), n_subjects, len(tasks)))
si, ti = 0, 0 
for ei, et in enumerate(edge_types):
    for gi, geo in enumerate(geometries):
        filename = os.path.join(folder, f"{et}_{geo}.txt")
        with open(filename) as lml_txt:
            for li, line in enumerate(lml_txt):
                if line != '\n':
                    lml_val = float(line.split('=')[1]) 
                    lml[ei, gi, si, ti] = lml_val
                    ti = (ti+1)%len(tasks)
                    si = (si + int((ti+1)==len(tasks)))%n_subjects

In [1]:
def bayes_factor(lml1, lml2, log=False):
    """
    Calculates the (log-)Bayes factor between lml1 and lml2. 
    PARAMS:
    lml1 : log-marginal likelihood of model 1
    lml2 : log-marginal likelihood of model 2
    log : whether to calculate the log-Bayes factor
    """
    ## Convert to Decimal to increase the number of bytes per value. 
    lml1 = Decimal(lml1)
    lml2 = Decimal(lml2)
    diff = lml1-lml2
    return diff if log else np.exp(diff)

In [109]:
## Calculate the full marginal log-likelihood 
bin_euc_lml = np.sum(lml[0,0])
bin_hyp_lml = np.sum(lml[0,1])
con_euc_lml = np.sum(lml[1,0])
con_hyp_lml = np.sum(lml[1,1])

K_bin, logK_bin = bayes_factor(bin_hyp_lml, bin_euc_lml), bayes_factor(bin_hyp_lml, bin_euc_lml, log=True)
K_con, logK_con = bayes_factor(con_hyp_lml, con_euc_lml), bayes_factor(con_hyp_lml, con_euc_lml, log=True)

n = 3
print(f"Binary LSMs: \n\t euc: {bin_euc_lml:.{n}g} \n\t hyp: {bin_hyp_lml:.{n}g} \n\t logK: {logK_bin:.{n}g} \n\t K: {K_bin:.{n}g}")
print(f"Continuous LSMs: \n\t euc: {con_euc_lml:.{n}g} \n\t hyp: {con_hyp_lml:.{n}g} \n\t logK: {logK_con:.{n}g} \n\t K: {K_con:.{n}g}")

Binary LSMs: 
	 euc: -9.43e+06 
	 hyp: -9.24e+06 
	 logK: 1.88e+5 
	 K: 2.33e+81431
Continuous LSMs: 
	 euc: -6.81e+06 
	 hyp: -5.97e+06 
	 logK: 8.46e+5 
	 K: 1.25e+367377
