In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import emcee
import corner

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from pyutils import *
from dataloc import *
import groupcatalog as gc

%load_ext autoreload
%autoreload 2

# MCMC of photo-z-plus results

These are the chains used to find parameters for the photo-z-plus technique of filling in missing redshifts (fiber incompleteness in BGS).

In [None]:
MCMC_FOLDER = OUTPUT_FOLDER + 'MCMC_PZP/'

# Older versions with different metrics, parameters numbers, etc
#backend = emcee.backends.HDFBackend(MCMC_FOLDER + "mcmc17_1_4.h5", read_only=True)
#backend = emcee.backends.HDFBackend(MCMC_FOLDER + "mcmc17b_1_5.h5", read_only=True)
#backend = emcee.backends.HDFBackend(MCMC_FOLDER + "mcmc17_2_0.h5", read_only=True)
#backend = emcee.backends.HDFBackend(MCMC_FOLDER + "mcmc13_2_1.h5", read_only=True)
#backend = emcee.backends.HDFBackend(MCMC_FOLDER + "mcmc13_2_2.h5", read_only=True)
#backend = emcee.backends.HDFBackend(MCMC_FOLDER + "mcmc13_1_6.h5", read_only=True)
#backend = emcee.backends.HDFBackend(MCMC_FOLDER + "mcmc13_2_3.h5", read_only=True)

# These are the latest versions of parameters and metrics
#backend = emcee.backends.HDFBackend(MCMC_FOLDER + "mcmc13_m4_1_7.h5", read_only=True)
##backend = emcee.backends.HDFBackend(MCMC_FOLDER + "mcmc13_m4_2_4.h5", read_only=True)
#backend = emcee.backends.HDFBackend(BASE_FOLDER + "mcmc13_m4_3_1.h5", read_only=True)

backend = emcee.backends.HDFBackend(BASE_FOLDER + "mcmc13_m4_2_6.h5", read_only=True)


print(backend.shape)

sampler = emcee.EnsembleSampler(backend.shape[0], backend.shape[1], gc.log_probability, backend=backend)
samples = sampler.get_chain(flat=True)
print(f"Flat iterations run {len(samples)}")

# Get the log probabilities and sort them to find the top N parameter sets
log_prob = sampler.get_log_prob(flat=True)
top_N = 5  # Number of top parameter sets to display
top_indices = np.argsort(log_prob)[::-1]  # Sort in descending order

selected_indices = []
for idx in top_indices:
    params = samples[idx]
    if all(not np.all(np.isclose(params, samples[prev_idx], rtol=0.5)) for prev_idx in selected_indices):
        selected_indices.append(idx)
        if len(selected_indices) >= top_N:
            break

with np.printoptions(precision=4, suppress=True, linewidth=200):
    for i, idx in enumerate(selected_indices):
        params = samples[idx]
        print(f"Rank {i+1}:")
        print(f"Parameters: [{int(params[0])}, {np.array2string(params[1:4], separator=', ')} {np.array2string(params[4:7], separator=', ')} {np.array2string(params[7:10], separator=', ')}, {np.array2string(params[10:13], separator=', ')}]")
        print(f"SCORE: {log_prob[idx]:.4f}")
        print()


In [None]:
try:
    tau = sampler.get_autocorr_time()
    print(tau)
except:
    print("Not burnt in yet")

flatchain = sampler.get_chain(discard=500, thin=3, flat=True)
print(np.shape(flatchain))
fig = corner.corner(flatchain)

In [None]:
# print the distribution of scores
plt.hist(log_prob, bins=np.linspace(.35, .6, 100))
plt.yscale('log')

In [None]:
c = sampler.get_chain(discard=0)
print(np.shape(c))

for i in range(c.shape[2]):
    plt.figure()
    plt.plot(c[:,:,i])
    plt.show()
