Merge f0a8a2f into 5e1cb4e

dflemin3 · Feb 13, 2020 · 78a247e · 78a247e
2 parents 5e1cb4e + f0a8a2f
commit 78a247e
Show file tree

Hide file tree

Showing 3 changed files with 167 additions and 31 deletions.
diff --git a/approxposterior/approx.py b/approxposterior/approx.py
@@ -767,8 +767,8 @@ def runMCMC(self, samplerKwargs=None, mcmcKwargs=None, runName="apRun",
             inspect the chains and calculate the burnin after the fact to ensure
             convergence, but this function works pretty well.
         thinChains : bool, optional
-            Whether or not to thin chains before GMM fitting.  Useful if running
-            long chains.  Defaults to True.  If true, estimates a thin cadence
+            Whether or not to thin chains.  Useful if running long chains.
+            Defaults to True.  If true, estimates a thin cadence
             via int(0.5*np.min(tau)) where tau is the intergrated autocorrelation
             time.
         verbose : bool, optional
@@ -821,35 +821,10 @@ def runMCMC(self, samplerKwargs=None, mcmcKwargs=None, runName="apRun",
 
         # If estimating burn in or thin scale, compute integrated
         # autocorrelation length of the chains
-        if estBurnin or thinChains:
-            # tol = 0 so it always returns an answer
-            tau = self.sampler.get_autocorr_time(tol=0)
-
-            # Catch NaNs
-            if np.any(~np.isfinite(tau)):
-                # Try removing NaNs
-                tau = tau[np.isfinite(np.array(tau))]
-                if len(tau) < 1:
-                    if verbose:
-                        print("Failed to compute integrated autocorrelation length, tau.")
-                        print("Setting tau = 1")
-                    tau = 1
-
-        # Estimate burn-in?
-        if estBurnin:
-            iburn = int(2.0*np.max(tau))
-        else:
-            iburn = 0
-
-        # Thin chains?
-        if thinChains:
-            ithin = np.max((int(0.5*np.min(tau)), 1))
-        else:
-            ithin = 1
-
-        if verbose:
-            print("burn-in estimate: %d" % iburn)
-            print("thin estimate: %d" % ithin)
+        iburn, ithin = mcmcUtils.estimateBurnin(self.sampler,
+                                                estBurnin=estBurnin,
+                                                thinChains=thinChains,
+                                                verbose=verbose)
 
         return self.sampler, iburn, ithin
     # end function

diff --git a/approxposterior/mcmcUtils.py b/approxposterior/mcmcUtils.py
@@ -159,3 +159,69 @@ def batchMeansMCSE(samples, bins=None, fn=None):
     mcse = b / (bins - 1) * np.sum((y - mu)**2, axis=0)
     return np.sqrt(mcse / len(samples))
 # end function
+
+
+def estimateBurnin(sampler, estBurnin=True, thinChains=True, verbose=False):
+    """
+    Estimate the integrated autocorrelation length on the MCMC chain associated
+    with an emcee sampler object. With the integrated autocorrelation length,
+    we can then estimate the burn-in length for the MCMC chain. This procedure
+    follows the example outlined here:
+    https://emcee.readthedocs.io/en/stable/tutorials/autocorr/
+
+    Parameters
+    ----------
+    sampler : emcee.EnsembleSampler
+        emcee MCMC sampler object/backend handler, given a complete chain
+    estBurnin : bool, optional
+        Estimate burn-in time using integrated autocorrelation time
+        heuristic.  Defaults to True. In general, we recommend users
+        inspect the chains and calculate the burnin after the fact to ensure
+        convergence, but this function works pretty well.
+    thinChains : bool, optional
+        Whether or not to thin chains.  Useful if running long chains.
+        Defaults to True.  If true, estimates a thin cadence
+        via int(0.5*np.min(tau)) where tau is the intergrated autocorrelation
+        time.
+    verbose : bool, optional
+        Output all the diagnostics? Defaults to False.
+
+    Returns
+    -------
+    iburn : int
+        burn-in index estimate.  If estBurnin == False, returns 0.
+    ithin : int
+        thin cadence estimate.  If thinChains == False, returns 1.
+    """
+
+    # Set tol = 0 so it always returns an answer
+    tau = sampler.get_autocorr_time(tol=0)
+
+    # Catch NaNs
+    if np.any(~np.isfinite(tau)):
+        # Try removing NaNs
+        tau = tau[np.isfinite(np.array(tau))]
+        if len(tau) < 1:
+            if verbose:
+                print("Failed to compute integrated autocorrelation length, tau.")
+                print("Setting tau = 1")
+            tau = 1
+
+    # Estimate burn-in?
+    if estBurnin:
+        iburn = int(2.0*np.max(tau))
+    else:
+        iburn = 0
+
+    # Thin chains?
+    if thinChains:
+        ithin = np.max((int(0.5*np.min(tau)), 1))
+    else:
+        ithin = 1
+
+    if verbose:
+        print("burn-in estimate: %d" % iburn)
+        print("thin estimate: %d" % ithin)
+
+    return iburn, ithin
+# end function
diff --git a/approxposterior/tests/test_Burnin.py b/approxposterior/tests/test_Burnin.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+
+Test estimating burn-in time of an MCMC chain
+
+@author: David P. Fleming [University of Washington, Seattle], 2020
+@email: dflemin3 (at) uw (dot) edu
+
+"""
+
+from approxposterior import mcmcUtils
+import numpy as np
+import emcee
+
+
+def testBurnin():
+    """
+    Test integrated autocorrelation length, and hence burn-in time, estimation.
+    """
+
+    np.random.seed(42)
+
+    # Simulate simple MCMC chain based fitting a line
+    # Choose the "true" parameters.
+    mTrue = -0.9594
+    bTrue = 4.294
+
+    # Generate some synthetic data from the model.
+    N = 50
+    x = np.sort(10*np.random.rand(N))
+    obserr = 0.5 # Amplitude of noise term
+    obs = mTrue * x + bTrue # True model
+    obs += obserr * np.random.randn(N) # Add some random noise
+
+    # Define the loglikelihood function
+    def logLikelihood(theta, x, obs, obserr):
+
+        # Model parameters
+        theta = np.array(theta)
+        m, b = theta
+
+        # Model predictions given parameters
+        model = m * x + b
+
+        # Likelihood of data given model parameters
+        return -0.5*np.sum((obs-model)**2/obserr**2)
+
+
+    # Define the logprior function
+    def logPrior(theta):
+
+        # Model parameters
+        theta = np.array(theta)
+        m, b = theta
+
+        # Probability of model parameters: flat prior
+        if -5.0 < m < 0.5 and 0.0 < b < 10.0:
+            return 0.0
+        return -np.inf
+
+
+    # Define logprobability function: l(D|theta) * p(theta)
+    # Note: use this for emcee, not approxposterior!
+    def logProbability(theta, x, obs, obserr):
+
+        lp = logPrior(theta)
+        if not np.isfinite(lp):
+            return -np.inf
+        return lp + logLikelihood(theta, x, obs, obserr)
+
+    # Randomly initialize walkers
+    p0 = np.random.randn(32, 2)
+    nwalkers, ndim = p0.shape
+
+    # Set up MCMC sample object - give it the logprobability function
+    sampler = emcee.EnsembleSampler(nwalkers, ndim, logProbability, args=(x, obs, obserr))
+
+    # Run the MCMC for 5000 iteratios
+    sampler.run_mcmc(p0, 5000);
+
+    # Estimate burnin, thin lengths
+    iburn, ithin = mcmcUtils.estimateBurnin(sampler, estBurnin=True,
+                                            thinChains=True, verbose=False)
+    test = [iburn, ithin]
+
+    # Compare estimated burnin to the known value
+    errMsg = "burn-in, thin lengths are incorrect."
+    truth = [67, 15]
+    assert np.allclose(truth, test, rtol=1.0e-1), errMsg
+# end function
+
+
+if __name__ == "__main__":
+    testBurnin()