In [1]:
import collections
import itertools
import pandas as pd
import scipy.stats as st
import statsmodels.tools.numdiff as smnd
import numpy as np
import numba
import random
import math
from copy import copy, deepcopy
import emcee
random.seed()

4.2 (a) $P(r, n \mid p)$ follows the story of the Binomial distribution (r successes in n trials). The expression is given by:

\begin{align}
P(r, n \mid p, I) = {n \choose r} p^r (1-p)^{n-r}
\end{align}

\begin{align}
P(r, n \mid p, I) = \frac{n!}{r! (n-r)!} p^r (1-p)^{n-r}
\end{align}


We don't have any prior information about $p$, so we will choose a uniform prior:

\begin{align}
P(p) = \frac{1}{p_{max} - p_{min}} = \frac{1}{1 - 0} = 1
\end{align}

Then, the posterior is proportional to the following: 

\begin{align}
P(p \mid r,n, I) \propto \frac{n!}{r! (n-r)!} p^r (1-p)^{n-r}
\end{align}

<div class='alert alert-info'>
Very nice. Instead of using the log factorials in the code below, you can take advantage of the fact that $n! = \Gamma(n+1)$ where $\Gamma$ is the gamma function. The log gamma function is also packaged nicely in `scipy.special.gammaln()`.

10/10
</div>

In [5]:
n_wild = 71
r_wild = 6

In [38]:
def log_posterior_binomial(p, r, n):
    """
    Unnormalized log posterior of a multivariate Gaussian.
    n and r are constants, x is a 1-D array.
    """
    return math.log(math.factorial(n)/(math.factorial(r) * math.factorial(n-r)) * p**r * (1-p) ** (n-r));
n_dim = 1        # number of parameters in the model (p)
n_walkers = 50   # number of MCMC walkers
n_burn = 1000    # "burn-in" period to let chains stabilize
n_steps = 5000   # number of MCMC steps to take after burn-in
sampler = emcee.EnsembleSampler(n_walkers, n_dim, log_posterior_binomial, args=(r_wild, n_wild))

In [39]:
p0 = [np.random.rand(n_dim) for i in range(n_walkers)]
p0

[array([ 0.64890503]),
 array([ 0.80235907]),
 array([ 0.40444134]),
 array([ 0.66186655]),
 array([ 0.40580916]),
 array([ 0.11325823]),
 array([ 0.83756836]),
 array([ 0.54093228]),
 array([ 0.58012418]),
 array([ 0.51363983]),
 array([ 0.1409441]),
 array([ 0.03503155]),
 array([ 0.78566107]),
 array([ 0.87691119]),
 array([ 0.3441377]),
 array([ 0.03277774]),
 array([ 0.39982808]),
 array([ 0.93865447]),
 array([ 0.75934442]),
 array([ 0.68254889]),
 array([ 0.74807339]),
 array([ 0.50082776]),
 array([ 0.35574266]),
 array([ 0.9876341]),
 array([ 0.44536158]),
 array([ 0.70026443]),
 array([ 0.3290194]),
 array([ 0.27233586]),
 array([ 0.91875503]),
 array([ 0.33910972]),
 array([ 0.77383788]),
 array([ 0.10258438]),
 array([ 0.20204769]),
 array([ 0.7743979]),
 array([ 0.34248594]),
 array([ 0.13332303]),
 array([ 0.37173611]),
 array([ 0.60156598]),
 array([ 0.92965744]),
 array([ 0.7531553]),
 array([ 0.87944226]),
 array([ 0.31826196]),
 array([ 0.8056894]),
 array([ 0.7716393

We couldn't fix the math domain error in the log posterior function, but if we could have debugged it, the following line of code would have sampled from the posterior. 

<div class='alert alert-info'>
Actually, it would have spat out an error. When generating the array of starting positions for the walkers you must do so as a column vector. Here, it is merely making a list. The correct syntax would have been 
<br />
```
p0 = np.empty((n_walkers, n_dim))
p0[:,0] = np.random.uniform(0, 1, n_walkers)
```
</div>

In [41]:
sampler.run_mcmc(p0, n_steps)

emcee: Exception while calling your likelihood function:
  params: [ 1.42118563]
  args: (6, 71)
  kwargs: {}
  exception:


Traceback (most recent call last):
  File "D:\Anacondabi103\lib\site-packages\emcee\ensemble.py", line 519, in __call__
    return self.f(x, *self.args, **self.kwargs)
  File "<ipython-input-38-1df198edbdc7>", line 6, in log_posterior_binomial
    return math.log(math.factorial(n)/(math.factorial(r) * math.factorial(n-r)) * p**r * (1-p) ** (n-r));
ValueError: math domain error


ValueError: math domain error

<div class='alert alert-info'>
Final score for problem 4.2 = 10/40
</div>