In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import scipy
from scipy.integrate import dblquad



import sys 
sys.path.append('../')
from utils import *

%matplotlib inline

mpl.rcParams['axes.titlesize'] = 20
mpl.rcParams['axes.labelsize'] = 17
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['legend.fontsize'] = 16
mpl.rcParams['figure.figsize'] = (15, 5)

## Calculate the analytical evidence of the Negative Binomial model by numerical integration

We generate data from the Negative Binomial model by sampling from a mixture of Poisson and Gamma distribution: 

\begin{align}
x | k, \theta &\sim Poisson(Gamma(k, \theta)) \\
p(x | k, \theta) &= p_{Poisson}(x | \lambda) \; \; p_{Gamma}(\lambda | k, \theta)
\end{align}

We have additional Gamma priors on $k$ and $\theta$ which parameters we fix. The marginal likelihood is given by 

\begin{align}
p(x) &= \int \int p_{Poisson}(x | \lambda) \; p_{Gamma}(\lambda | k, \theta) \; p(k) \; p(\theta) \; dk d\theta
\end{align}

We approximate this integral by calculating and summing the values over a grid of $k$ and $\theta$ values. 

In [None]:
# fix the priors on k and theta 
theta2 = 2.0
k2 = 5.
theta3 = 1.0 
k3 = 1
prior_k = scipy.stats.gamma(a=k2, scale=theta2)
prior_theta = scipy.stats.gamma(a=k3, scale=theta3)

# set up a grid of values around the priors 
ks = np.linspace(5., 15, 200)
thetas = np.linspace(0.1, 3, 200)

k_grid, th_grid = np.meshgrid(ks, thetas)

In [None]:
# generate some data 
sample_size = 20
gamma_prior = scipy.stats.gamma(a=prior_k.rvs(), scale=prior_theta.rvs())

lams = np.array([gamma_prior.rvs() for i in range(sample_size)])
x = np.array([scipy.stats.poisson.rvs(mu=lams[i]) for i in range(sample_size)])

In [None]:
def nb_evidence_integrant(k, theta, x, lams, sample_size):
    pk = prior_k.pdf(k)
    ptheta = prior_theta.pdf(theta)

    gamma_prior = scipy.stats.gamma(a=k, scale=theta)
    value = 0.
    for i in range(sample_size):         
        value += np.log(scipy.stats.poisson.pmf(k=x[i], mu=lams[i]) * gamma_prior.pdf(x=lams[i]))

    value = np.exp(value) * pk * ptheta
    return value

def nb_integrant_direct(k, theta, x, sample_size): 
    pk = prior_k.pdf(k)
    ptheta = prior_theta.pdf(theta)
    
    r = k 
    p = theta / (1 + theta)
    
    value = 0
    for i in range(sample_size): 
        value += np.log(scipy.stats.nbinom.pmf(k=x[i], n=r, p=p))

    value = np.exp(value) * pk * ptheta
    return value

In [None]:
def nb_evidence_integral(x, lambs, prior_k, prior_theta, ks, thetas, log=False):
    sample_size = x.size
    
    k_grid, th_grid = np.meshgrid(ks, thetas)
    
    grid_values = np.zeros((thetas.size, ks.size))

    for i in range(thetas.shape[0]): 
        for j in range(ks.shape[0]): 

            grid_values[i, j] = nb_evidence_integrant(k_grid[i, j], th_grid[i, j], x, lams, sample_size)
            
    integral = np.trapz(np.trapz(grid_values, x=thetas, axis=0), x=ks, axis=0)
    
    return np.log(integral) if log else integral
    
def nb_evidence_integrant(k, theta, x, lams, sample_size):
    pk = prior_k.pdf(k)
    ptheta = prior_theta.pdf(theta)

    gamma_prior = scipy.stats.gamma(a=k, scale=theta)
    value = 0.
    for i in range(sample_size):         
        value += np.log(scipy.stats.poisson.pmf(k=x[i], mu=lams[i]) * gamma_prior.pdf(x=lams[i]))

    value = np.exp(value) * pk * ptheta
    return value

In [None]:
ev = nb_evidence_integral(x, lams, prior_k, prior_theta, ks, thetas)
print(ev)

## Try the same with purely analytical Poisson evidence 

In [None]:
# prior is gamma 
k1 = 5. 
theta1 = 2. 
prior = scipy.stats.gamma(a=k1, scale=theta1)

# sample data 
sample_size = 20
x = scipy.stats.poisson.rvs(mu=prior.rvs(), size=sample_size)

In [None]:
# calculate the evidence: 
poisson_sum_evidence(x, k1, theta1, log=True)