In [1]:
import numpy as np
from codebase.file_utils import save_obj, load_obj, make_folder, path_backslash
from pdb import set_trace
from scipy.stats import multivariate_normal, norm
from scipy.optimize import minimize
from codebase.plot import *
alt.data_transformers.disable_max_rows()


DataTransformerRegistry.enable('default')

In [2]:
y_t = np.array([0,0,0,0,1,0])
theta_t=dict()
theta_t['alpha'] = np.array([0.3, 0.1, 0.4, 0.6, -0.8, 0.1])
theta_t['beta'] = np.ones((6,1))*0.3
z_t = np.array([[1]])


In [3]:
def get_pi_z(z, theta):
    exp_eta = np.exp(theta['alpha'] +  z @ theta['beta'].T)
    return exp_eta/(1+exp_eta)

pi_z = get_pi_z(z_t, theta_t)

In [4]:
def get_log_likelihood(z,y,theta):
    pi_z = get_pi_z(z, theta)
    s1 = np.sum((y*np.log(pi_z))+((1.-y)*(1.-np.log(pi_z))))
    s2 = -.5 * np.sum(z**2)
    return s1+s2

def get_neg_log_likelihood(z,y,theta):
    return - get_log_likelihood(z,y,theta)

def get_neg_posterior(z,y,theta):
    return - (get_log_likelihood(z,y,theta)+ norm.logpdf(z))


lglk = get_neg_log_likelihood(z_t, y_t, theta_t)
lglk

-5.733781394723231

In [5]:
def get_posterior_pdf(z, y, theta):
    return np.exp(get_log_likelihood(z ,y,theta) + norm.logpdf(z))
get_posterior_pdf(z_t, y_t, theta_t)

array([[74.80186827]])

In [6]:
def get_grad_pi_z(z, theta):
    exp_eta = np.exp(theta['alpha'] +  z @ theta['beta'].T)
    return (exp_eta *  theta['beta'].T)/(1+exp_eta)**2

get_grad_pi_z(z_t, theta_t)

array([[0.06863527, 0.07207822, 0.06651386, 0.06165009, 0.07050111,
        0.07207822]])

In [7]:
delta = 1e-5
z1 = z_t
z2 = z1 + delta
num_grad = (get_pi_z(z2, theta_t) - get_pi_z(z1, theta_t))/delta


In [8]:
if not np.allclose(num_grad, get_grad_pi_z(z1, theta_t)):
    print("Numerical Diff does not give the same answer as analytical")

In [10]:
def get_fisher_information(z, y, theta):
    pi_z = get_pi_z(z, theta)
    grad_pi_z = get_grad_pi_z(z, theta)
    r1 =grad_pi_z**2
    r2 =pi_z*(1.-pi_z)
    return 1. + np.sum(r1/r2)

get_fisher_information(z_t, y_t, theta_t)

1.1234370362299013

In [22]:
# try computing observed_information matrix = the hessian evaluated at the mode 

##

In [12]:
x0 = z_t.copy()*0.1
res = minimize(get_neg_log_likelihood, np.array([[1]]), args=(y_t, theta_t), method='BFGS')

In [13]:
def get_laplace_approx(y, theta):
    res = minimize(get_neg_posterior, np.array([[1]]), args=(y, theta), method='BFGS')
    fisher_info_matrix = get_fisher_information(res.x, y, theta)
    return multivariate_normal(mean = res.x, cov = fisher_info_matrix**(-1))
lapldist =  get_laplace_approx(y_t, theta_t)
laplace_samples = lapldist.rvs(size = 10000)

In [14]:
df = pd.DataFrame(laplace_samples, columns=['value'])

In [15]:
laplace_pdf = alt.Chart(df).transform_fold(
        ['value']
        ).transform_density(
            density='value',
        ).mark_area(opacity=0.5).encode(
            alt.X('value:Q', title=None),
            alt.Y('density:Q')
)
laplace_pdf

In [16]:
prior_samples = norm.rvs(size = 10000)
df = pd.DataFrame(prior_samples, columns=['value'])
prior_pdf = alt.Chart(df).transform_fold(
        ['value']
        ).transform_density(
            density='value',
        ).mark_area(opacity=0.5, color='red').encode(
            alt.X('value:Q', title=None),
            alt.Y('density:Q')
)
prior_pdf

## tests

In [17]:
## numerical defrivative at argmax point should be zero
delta = 1e-3
z1 = res.x
z2 = z1 + delta
num_grad = (get_log_likelihood(z2, y_t, theta_t) - get_log_likelihood(z1, y_t,  theta_t))/delta
assert np.allclose(num_grad, 0., atol=1e-3)


In [18]:

lglks = []
for z_i in np.linspace(-4,4,40):
    l_i = get_posterior_pdf(z_i.reshape((1,1)), y_t, theta_t)
    lglks.append(l_i)

In [19]:
constant = np.squeeze(lglks).sum()*8/40.

In [20]:
df_lglks = pd.DataFrame(np.squeeze(lglks)/constant, columns=['post'])
df_lglks['z'] = np.linspace(-4,4,40)
alt.Chart(df_lglks).mark_line().encode(
            alt.X('z:Q', title=None),
            alt.Y('post:Q')
)+laplace_pdf+prior_pdf


In [21]:
def sigmoid(x, a, b):
    exp_x = np.exp(a +  x *b)
    return exp_x/(1.+exp_x)

def grad_sigmoid(x, a, b):
    exp_x = np.exp(a +  x *b)
    return b*exp_x/(1. + exp_x)**2

sigmoid(1. , 2. ,3)
grad_sigmoid(1. , 2. ,3)

0.019944170012370463

In [None]:
## numerical defrivative at argmax point should be zero
delta = 1e-3
z1 = 1.
z2 = z1 + delta
num_grad = (sigmoid(z2, 2, 3) - sigmoid(z1, 2, 3))/delta
assert np.allclose(num_grad, grad_sigmoid(z1 , 2. ,3), atol=1e-3)
# num_grad