In [2]:
# Some analytic Bayesian estimators for common distributions (Bernoulli, Binomial, ...)
# Reference: A first course in Bayesian Statistical Methods by Peter D.Hoff
# Author: tran.vuduc[at]gmail.com

import numpy as np
import matplotlib.pyplot as plt
from scipy.special import gamma

# The posterior distribution for parameter theta of a Bernoulli distribution B(1,theta)
We have 
$$p(\theta\ |\ y) = \frac{p(y\ |\ \theta) \ p(\theta)}{p(y)}=\frac{\theta^k\ (1-\theta)^{n-k}\ p(\theta)}{p(y)}$$

With the prior distribution of theta as an Uniform distribution $U[0,1]$, $p(\theta)=1$ and it is easy to demonstrate that
$$p(y)=\frac{\Gamma(k+1)\ \Gamma(n-k+1)}{\Gamma(n+2)}$$
Hence, the posterior distribution is
$$p(\theta\ |\ y_1, \ldots, y_{n}) = \frac{\Gamma(n+2)}{\Gamma(k+1)\ \Gamma(n-k+1)}\theta^{k}(1-\theta)^{n-k}$$

This is the density function of the Beta distribution: $Beta(a=k+1, b=n-k+1)$. 

We use the following properties for evaluating the posterior mean and variance of theta.

If $X \sim Beta(a,b)$, then

$$E(X) = \frac{a}{a+b} \quad \textrm{and} \quad Var(X) = \frac{ab}{(a+b+1)(a+b)^2}$$


In [3]:
def bayes_estimator_bernoulli(data):
    # input: data is a numpy array with binary value, which has the distribution B(1,theta)
    # model: For estimating the parameter theta 
    ## the prior distribution for theta is Uniform[0,1]
    # output: 
    ## a,b: two parameters of the posterior distribution Beta(a,b)
    ## pos_mean: posterior estimation for the mean of theta
    ## pos_var: posterior estimation for the var of theta
    n = len(data)
    k = sum(data)
    a = k+1
    b = n-k+1
    pos_mean = 1.*a/(a+b)
    pos_var = 1.*(a*b)/((a+b+1)*(a+b)**2)
    return a, b, pos_mean, pos_var

# Example
n = 129 # sample size
data = np.random.binomial(size=n, n=1, p=0.6)
a, b, pos_mean, pos_var = bayes_estimator_bernoulli(data)
a, b, pos_mean, pos_var

(81, 50, 0.6183206106870229, 0.0017878805536867422)

# The posterior distribution for parameter theta of a Binomial distribution B(n,theta)
$$p(\theta\ |\ y) = \frac{p(y\ |\ \theta) \ p(\theta)}{p(y)}=\frac{C_n^y\ \theta^y\ (1-\theta)^{n-y}\ p(\theta)}{p(y)} = c(y)\ \theta^y\ (1-\theta)^{n-y}\ p(\theta)$$

where
$$c(y) = \frac{C_n^y}{p(y)} $$

For the prior distribution as an uniform distribution, we have $p(\theta)=1$ for all $\theta \in [0,1]$. The term $c(y)$ can be found out by solving the following equation
$$1=\int_0^1 c(y) \theta^y\ (1-\theta)^{n-y} d\theta$$
$$\Leftrightarrow c(y) = \frac{\Gamma(n+2)}{\Gamma(y+1)\ \Gamma(n-y+1)}$$
Hence the posterior distribution is
$$p(\theta\ |\ y) = \frac{\Gamma(n+2)}{\Gamma(y+1)\ \Gamma(n-y+1)} \theta^y\ (1-\theta)^{n-y} = Beta(y+1, n-y+1)$$
We use the following properties for evaluating the posterior mean and variance of theta.

If $X \sim Beta(a,b)$, then

$$E(X) = \frac{a}{a+b} \quad \textrm{and} \quad Var(X) = \frac{ab}{(a+b+1)(a+b)^2}$$



In [4]:
def bayes_estimator_binomial(n, k, a_prior, b_prior):
    # input: 
    ## n: number of trials
    ## k: number of sucessful 
    # model: For estimating the parameter theta of B(n,theta)
    ## the prior distribution for theta is Beta(a_prior, b_prior)
    ## If a_prior=1 and b_prior=1, then the prior distribution is Uniform[0,1]
    # output: 
    ## a,b: two parameters of the posterior distribution Beta(a,b)
    ## pos_mean: posterior estimation for the mean of theta
    ## pos_var: posterior estimation for the var of theta
    
    a = k + a_prior
    b = n-k + b_prior
    pos_mean = 1.*a/(a+b)
    pos_var = 1.*(a*b)/((a+b+1)*(a+b)**2)
        
    return a, b, pos_mean, pos_var
# Example
n = 129 # sample size
data = np.random.binomial(size=1, n=n, p=0.6)
data = data.tolist()[0]
a, b, pos_mean, pos_var = bayes_estimator_binomial(n, data, 1, 1)
a, b, pos_mean, pos_var

(84, 47, 0.6412213740458015, 0.0017428524508531502)

In [5]:
a, b, pos_mean, pos_var = bayes_estimator_binomial(n, data, 50, 100)
a, b, pos_mean, pos_var

(133, 146, 0.4767025089605735, 0.0008909186675402423)