In [3]:
import collections
import itertools
import pandas as pd
import scipy.stats as st
import statsmodels.tools.numdiff as smnd
import numpy as np
import numba
import random
import math
from copy import copy, deepcopy

random.seed()

def mh_step(x, log_post, log_post_current, sigma, args=()):
    """
    Parameters
    ----------
    x : ndarray, shape (n_variables,)
        The present location of the walker in parameter space.
    log_post : function
        The function to compute the log posterior. It has call
        signature `log_post(x, *args)`.
    log_post_current : float
        The current value of the log posterior.
    sigma : ndarray, shape (n_variables, )
        The standard deviations for the proposal distribution.
    args : tuple
        Additional arguments passed to `log_post()` function.

    Returns
    -------
    x_out : ndarray, shape (n_variables,)
        The position of the walker after the Metropolis-Hastings
        step. If no step is taken, returns the inputted `x`.
    log_post_updated : float
        The log posterior after the step.
    accepted : bool
        True is the proposal step was taken, False otherwise.
    """
    mu, inv_cov = args
    
    #Sample next point
    nextPoint = np.random.multivariate_normal(x, cov)

    #Calculate metropolis ratio
    logmetropolisRatio = log_post(nextPoint, *args) - log_post_current
    
    #This is converting the log metropolis ratio to an actual value between 0 and 1 for probability
    logMetropolisRatioEx = np.exp(log_post(nextPoint, *args))/ np.exp(log_post_current)
    
    #Random decimal between 0 and 1 to decide if we should proceed with new point at a certain probability
    n = random.uniform(0,1)
    
    #If metropolis ratio is >= 1 or the random n is less than the probability (meaning we proceed with the new point)
    #at a probability of logMetropolisRatioEx
    if (logmetropolisRatio >= 1 or n <= logMetropolisRatioEx):
        return nextPoint, log_post(np.array(nextPoint), *args), True
    else:
        return x, log_post_current, False
    
def mh_sample(log_post, x0, sigma, args=(), n_burn=1000, n_steps=5000,
              variable_names=None):
    """
    Parameters
    ----------
    log_post : function
        The function to compute the log posterior. It has call
        signature `log_post(x, *args)`.
    x0 : ndarray, shape (n_variables,)
        The starting location of a walker in parameter space.
    sigma : ndarray, shape (n_variables, )
        The standard deviations for the proposal distribution.
    args : tuple
        Additional arguments passed to `log_post()` function.
    n_burn : int, default 1000
        Number of burn-in steps.
    n_steps : int, default 1000
        Number of steps to take after burn-in.
    variable_names : list, length n_variables
        List of names of variables. If None, then variable names
        are sequential integers.
    
    Returns
    -------
    output : DataFrame
        The first `n_variables` columns contain the samples.
        Additionally, column 'lnprob' has the log posterior value
        at each sample.
    """
    samples = []
    lnprob = []
    finalPoint = x0
    new_mu, new_cov = args
    logPostCurrent = log_post(x0, *args)
    isAccepted = True
    
    #Burn-in period
    
    for i in range(n_burn):
        finalPoint, logPostCurrent, isAccepted = mh_step(finalPoint, log_post, logPostCurrent, sigma, args)
    
    #After burn-in, we actually log in sample and log posterior values
    for j in range(n_steps):
        finalPoint, logPostCurrent, isAccepted = mh_step(finalPoint, log_post, logPostCurrent, sigma, args)
        samples.append(finalPoint)
        lnprob.append(logPostCurrent)
    d = {'Samples': samples, 'Log Posterior Value': lnprob}
    df = pd.DataFrame(data=d)
    return df
        
mu = np.array([10.0, 20])
cov = np.array([[4, -2],[-2, 6]])
inv_cov = np.linalg.inv(cov)

@numba.jit(nopython=True)
def log_test_distribution(x, mu, inv_cov):
    """
    Unnormalized log posterior of a multivariate Gaussian.
    """
    return -np.dot((x-mu), np.dot(inv_cov, (x-mu))) / 2
def log_test_distribution(x, mu, inv_cov):
    """
    Unnormalized log posterior of a multivariate Gaussian.
    """
    return -np.dot((x-mu), np.dot(inv_cov, (x-mu))) / 2

#Initial guess: X0
x0 = np.array([5.0, 15.0])
_out = mh_sample(log_test_distribution,x0 , cov, args=(mu, inv_cov))

#Variables for keeping track of totals for both means so we can get average later
totalFirst = 0
totalSecond = 0
count = 0


for i in _out['Samples']:
    totalFirst += i[0]
    totalSecond += i[1]
    count += 1
    
#Output averages
print (totalFirst/count)
print (totalSecond / count)



10.0044499649
19.9437542888
