In [1]:
import os
import ast
import matplotlib.pyplot as plt
import sys
#sys.path.append(os.path.join(os.getcwd(), '../desi/'))
#import catalog_definitions as cat
import numpy as np
import emcee
import time
from multiprocessing import Pool
import corner

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from dataloc import *

# Example 1

In [24]:
def log_prob(x, mu, cov):
    diff = x - mu
    return -0.5 * np.dot(diff, np.linalg.solve(cov, diff))

ndim = 3

np.random.seed(42)

 # Fixed values, not parameters.
means = np.random.rand(ndim) # Means are between 0 and 1
# There are some covariancse between the gaussians
cov = 0.5 - np.random.rand(ndim**2).reshape((ndim, ndim)) 
cov = np.triu(cov)
cov += cov.T - np.diag(cov.diagonal())
cov = np.dot(cov, cov)

nwalkers = 16
niter = 5000
nburnin = 500

In [None]:
x = [0.5, 0.5, 0.5]
mu = means 
np.linalg.solve(cov, x-mu)

In [None]:
print(means)
print(cov)

In [26]:
# random priors from a uniform distribution between 0 and 1 given to each walker
p0 = np.random.uniform(low=np.zeros(ndim), high=np.ones(ndim), size=(nwalkers, ndim))

In [27]:
backend = emcee.backends.HDFBackend(OUTPUT_FOLDER + "test_backend")

In [None]:
# -- run emcee
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, args=[means, cov])
#state = sampler.run_mcmc(p0, nburnin)
#sampler.reset()

start = time.time()
# If you run this multiple times, it will keep doing 10000 more steps (per walker)
sampler.run_mcmc(p0, niter, progress=True)
end = time.time()
serial_time = end - start

print("Serial took {0:.1f} seconds".format(serial_time))

In [None]:
samples = sampler.get_chain()
ndim = samples.shape[2]
print(f'Number of steps: {samples.shape[0] * samples.shape[1]} (total); {samples.shape[0]} (per walker), ')
print(f'Number of walkers: {samples.shape[1]}')
print(f'Number of parameters: {ndim}')

try:
    tau = sampler.get_autocorr_time()
    print(tau)
except:
    print("Not burnt in yet")

burn = int(np.max(tau) * 2)
flatchain = sampler.get_chain(discard=burn, flat=True)
fig = corner.corner(flatchain)

In [None]:
idx = np.argmax(sampler.get_log_prob(flat=True))
values = sampler.get_chain(flat=True)[idx]
print(values)


In [None]:
with Pool() as pool: # default Pool() gest CPU_count processes. Good!
    sampler2 = emcee.EnsembleSampler(nwalkers, ndim, log_prob, pool=pool, args=[means, cov])
    state2 = sampler2.run_mcmc(p0, nburnin)
    sampler2.reset()

    start = time.time()
    sampler2.run_mcmc(p0, niter, progress=True)
    end = time.time()
    multi_time = end - start
    
    print("Multiprocessing took {0:.1f} seconds".format(multi_time))
    print("{0:.1f} times faster than serial".format(serial_time / multi_time))


In [None]:
import matplotlib.pyplot as plt

samples = sampler.get_chain(flat=True) # length is walkers * steps run total
PARAMETER_INDEX = 0

# View distribution of values (not including burn in) of a single parameter
plt.hist(samples[:, PARAMETER_INDEX], 100, color="k", histtype="step")
plt.xlabel(r"$\theta_1$")
plt.ylabel(r"$p(\theta_1)$")
plt.title(f'Posterior Distribution for parameter {PARAMETER_INDEX}')
plt.gca().set_yticks([])
plt.show()

# View chain for a single parameter
plt.plot(samples[:, PARAMETER_INDEX])
plt.xlabel('Iteration')
plt.ylabel(f'Parameter Value')
plt.title(f'Parameter Chain for parameter {PARAMETER_INDEX}')
plt.show()

In [None]:
print(
    "Mean acceptance fraction: {0:.3f}".format(
        np.mean(sampler.acceptance_fraction)
    )
)
print(
    "Mean autocorrelation time: {0:.3f} steps".format(
        np.mean(sampler.get_autocorr_time())
    )
)

In [None]:
# Corner Plots
import corner

fig = corner.corner(
    flat_samples, labels=labels, truths=[m_true, b_true, np.log(f_true)]
)

# Example 2

In [2]:
Nobs = 20
x_true = np.random.uniform(0,10, size=Nobs)
y_true = np.random.uniform(-1,1, size=Nobs)
alpha_true = 0.5
beta_x_true = 1.0
beta_y_true = 10.0
eps_true = 0.5
z_true = alpha_true + beta_x_true*x_true + beta_y_true*y_true
z_obs = z_true + np.random.normal(0, eps_true, size=Nobs)

In [None]:
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.scatter(x_true, z_obs, c=y_true, marker='o')
plt.colorbar()
plt.xlabel('X')
plt.ylabel('Z')
plt.subplot(1,2,2)
plt.scatter(y_true, z_obs, c=x_true, marker='o')
plt.colorbar()
plt.xlabel('Y')
plt.ylabel('Z')

In [4]:
# This is a linear regression example. Need to understand what these three do

def lnprior(p):
    # The parameters are stored as a vector of values, so unpack them
    alpha,betax,betay,eps = p
    # We're using only uniform priors, and only eps has a lower bound
    if eps <= 0:
        return -np.inf
    return 0

def lnlike(p, x, y, z):
    alpha,betax,betay,eps = p
    model = alpha + betax*x + betay*y
    # the likelihood is sum of the lot of normal distributions
    denom = np.power(eps,2)
    lp = -0.5*sum(np.power((z - model),2)/denom + np.log(denom) + np.log(2*np.pi))
    return lp

def lnprob(p, x, y, z):
    lp = lnprior(p)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(p, x, y, z)

In [None]:
def likelihood(p, x, y, z):
    alpha,betax,betay,eps = p
    model = alpha + betax*x + betay*y
    # the likelihood is sum of the lot of normal distributions
    denom = np.power(eps,2)
    lp = sum(np.power((z - model),2)/denom + np.log(denom) + np.log(2*np.pi))
    return lp

In [None]:
lnlike([alpha_true, beta_x_true, beta_y_true, eps_true], x_true, y_true, z_obs)

In [6]:
Nwalker,Ndim = 50,4

In [None]:
import scipy.optimize as opt
nll = lambda *args: -lnlike(*args)
result = opt.minimize(nll, [alpha_true, beta_x_true, beta_y_true, eps_true], args=(x_true, y_true, z_obs))
print(result['x'])
p0 = [result['x']+1.e-4*np.random.randn(Ndim) for i in range(Nwalker)]

In [8]:
p0 = np.random.uniform(low=np.zeros(Ndim), high=np.ones(Ndim), size=(Nwalker, Ndim))

In [9]:
sampler = emcee.EnsembleSampler(Nwalker,Ndim,lnprob,args=(x_true,y_true,z_obs))
pos,prob,state = sampler.run_mcmc(p0, 2000)

In [None]:
res=plt.plot(sampler.chain[:,:,0].T, '-', color='k', alpha=0.3)
plt.axhline(alpha_true, color='blue')

In [11]:
idx = np.argmax(sampler.get_log_prob(flat=True))
m_alpha,m_betax,m_betay,m_eps = sampler.get_chain(flat=True)[idx]

#m_alpha,m_betax,m_betay,m_eps = np.median(sampler.flatchain, axis=0)


In [None]:

plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.plot(x_true, z_obs-m_alpha-m_betay*y_true, 'o')
plt.xlabel('X')
plt.ylabel('Z - alpha - beta_y y')
# Now plot the model
xx = np.array([x_true.min(), x_true.max()])
plt.plot(xx, xx*m_betax)
plt.plot(xx, xx*m_betax + m_eps, '--', color='k')
plt.plot(xx, xx*m_betax - m_eps, '--', color='k')
plt.subplot(1,2,2)
plt.plot(y_true, z_obs-m_alpha-m_betax*x_true, 'o')
plt.xlabel('Y')
plt.ylabel('Z - alpha - beta_x x')
yy = np.array([y_true.min(), y_true.max()])
plt.plot(yy, yy*m_betay)
plt.plot(yy, yy*m_betay + m_eps, '--', color='k')
plt.plot(yy, yy*m_betay - m_eps, '--', color='k')