In [1]:
# from codebase.classes import Particles
# from codebase.classes_data import Data
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from scipy.special import logsumexp
from scipy.stats import norm, invgamma
import altair as alt
alt.data_transformers.disable_max_rows()


import pystan

## Load Model Evidence

In [2]:
log_dirs = dict()
log_dirs['ibis'] = 'log/20210902_191203_toy_m12/'

model_nums = ['ibis']
# load existing results
bf = dict()

for model in model_nums:
    bf[str(model)] = load_obj('log_lklhds', log_dirs[str(model)])



## data

In [3]:
data = load_obj('data', log_dirs['ibis'])
data.raw_data


{'random_seed': 0,
 'N': 100,
 'J': 1,
 'alpha': array([0.]),
 'sigma': array([1.]),
 'Marg_cov': array([[1.]]),
 'y': array([[ 1.76405235],
        [ 0.40015721],
        [ 0.97873798],
        [ 2.2408932 ],
        [ 1.86755799],
        [-0.97727788],
        [ 0.95008842],
        [-0.15135721],
        [-0.10321885],
        [ 0.4105985 ],
        [ 0.14404357],
        [ 1.45427351],
        [ 0.76103773],
        [ 0.12167502],
        [ 0.44386323],
        [ 0.33367433],
        [ 1.49407907],
        [-0.20515826],
        [ 0.3130677 ],
        [-0.85409574],
        [-2.55298982],
        [ 0.6536186 ],
        [ 0.8644362 ],
        [-0.74216502],
        [ 2.26975462],
        [-1.45436567],
        [ 0.04575852],
        [-0.18718385],
        [ 1.53277921],
        [ 1.46935877],
        [ 0.15494743],
        [ 0.37816252],
        [-0.88778575],
        [-1.98079647],
        [-0.34791215],
        [ 0.15634897],
        [ 1.23029068],
        [ 1.20237985],
        

## IBIS estimate

In [15]:
bf['ibis'].sum()

-148.30623839842207

## Analytical estimate

In [5]:
def loglikelihood(x, mu, sigma):
    return norm.logpdf(x, loc=mu, scale=sigma).sum()

def logprior(mu, sigma):
    a = invgamma.logpdf(x=sigma**2, a=1, scale=1)
    b = norm.logpdf(x=mu, loc=0, scale=sigma)
    return (a+b)

def logposterior(x, mu, sigma): 
    n = x.shape[0]
    xavg = np.mean(x)
    ssquare = np.sum((x-xavg)**2)
    a = norm.logpdf(
        x = mu,
        loc=n*xavg/(n+1) ,
        scale = (sigma/np.sqrt((n+1)))
        )
    
    b = invgamma.logpdf(
        x=sigma**2, 
        a=(n+2)*0.5,
        scale=0.5 * (2+ssquare+((n*(xavg**2))/(n+1))) ## note in python's parametrization scale = 1/beta
    )
    return a+b


n=1000
m1 = 1
s1 = 1.2
print("mu=%.2f, sigma^2 =%.2f"%(m1, s1**2))

# y = norm.rvs(loc=m1, scale=s1, size=n)
y = data.raw_data['y']

print(loglikelihood(y , m1, s1))
print(logprior(m1, s1))
print(logposterior(y , m1, s1))

logmodel_evidence = (
    loglikelihood(y, m1, s1) +
    logprior(m1, s1) -
    logposterior(y, m1, s1)
)
logmodel_evidence

mu=1.00, sigma^2 =1.44
-176.0908557661174
-2.872212983841113
-31.929316673736466


-147.03375207622202

## checking analytical posterior

In [6]:
from scipy.stats import invgamma, norm
def loglikelihood(x, mu, sigma):
    return -norm.logpdf(x, loc=mu, scale=sigma).sum()

def draw_prior(size):
    draws = np.empty((size, 2))
    sigmasquare = invgamma.rvs(a=1, scale=1, size=size)
    draws[:,1] = sigmasquare
    for i in range(size):
        mu = norm.rvs(loc=0, scale=np.sqrt(sigmasquare[i]))
        draws[i,0] = mu

#     draws[:,0] = norm.rvs(loc=0, scale=np.sqrt(sigmasquare))
    return draws

def draw_posterior(x, size): 
    n = x.shape[0]
    xavg = np.mean(x)
    ssquare = np.sum((x-xavg)**2)
    draws = np.empty((size, 2))
    sigmasquare = invgamma.rvs(
        a=(n+2)/2.,
        scale=0.5*(2+ssquare+((n*xavg**2)/(n+1))),
        size=size
    )
    draws[:,1] = sigmasquare
    for i in range(size):
        mu = norm.rvs(
            loc=n*xavg/(n+1) ,
            scale = np.sqrt((sigmasquare[i]/(n+1)))
            )
        draws[i,0] = mu
        
#     draws[:,0] = norm.rvs(
#         loc=n*xavg/(n+1) ,
#         scale = np.sqrt((sigmasquare/(n+1))),
#         size=size
#         )
    return draws
            
draw_prior(1000).mean(0)


array([3.97958980e-03, 6.61780611e+00])

In [7]:
n=1000
m1 = 1
s1 = 1.2
print("mu=%.2f, sigma^2 =%.2f"%(m1, s1**2))

y = norm.rvs(loc=m1, scale=s1, size=n)
draw_posterior(y, 1000).mean(0).round(2)


mu=1.00, sigma^2 =1.44


array([0.94, 1.45])

In [14]:
def likelihood(x, mu, sigma):
    return np.exp(norm.logpdf(x, loc=mu, scale=sigma).sum())

def prior(mu, sigma):
    a = invgamma.pdf(x=sigma**2, a=1, scale=1)
    b = norm.pdf(x=mu, loc=0, scale=sigma)
    return np.exp(a+b)

def posterior(x, mu, sigma): 
    n = x.shape[0]
    xavg = np.mean(x)
    ssquare = np.sum((x-xavg)**2)
    a = norm.logpdf(
        x = mu,
        loc=n*xavg/(n+1) ,
        scale = (sigma/np.sqrt((n+1)))
        )
    
    b = invgamma.logpdf(
        x=sigma**2, 
        a=(n+2)*0.5,
        scale=0.5 * (2+ssquare+((n*(xavg**2))/(n+1))) ## note in python's parametrization scale = 1/beta
    )
    return np.exp(a+b)


n=1000
m1 = 1
s1 = 1.2
print("mu=%.2f, sigma^2 =%.2f"%(m1, s1**2))

# y = norm.rvs(loc=m1, scale=s1, size=n)
y = data.raw_data['y']

print(likelihood(y , m1, s1))
print(prior(m1, s1))
print(posterior(y , m1, s1))

model_evidence = (
    likelihood(y, m1, s1) * prior(m1, s1) / posterior(y, m1, s1)
)
np.log(model_evidence)

mu=1.00, sigma^2 =1.44
3.3474417400845346e-77
1.6092053795801171
1.3591705557037268e-14


-143.68579858827778