In [None]:
import os
import glob
import collections
from pathlib import Path

from astropy.io import ascii
from vega import VegaInterface
import emcee
import corner
import yaml
%pylab inline

import constants
from MCMC_Run import one_dimensional_log_lik

In [None]:
LABEL_MAPPING = {
    'bias_QSO': r'$b_{gal}$', 
    'beta_QSO': r'$\beta_{gal}$', 
    'sigma_velo_disp_gauss_QSO': r'$\sigma_z$',
    'drp_QSO': r'$\delta_z$',
    'bias_hcd': r'$b_{DLA}$',
    'beta_hcd': r'$\beta_{DLA}$',
    'L0_hcd': r'$L_{DLA}$'
}

SURVEY_TITLE_MAPPING = {
    '3dhst': '3D-HST',
    'clamato': 'CLAMATO',
    'mosdef': 'MOSDEF',
    'vuds': 'VUDS',
    'zDeep': 'COSMOS-zDEEP'
}

STACKED_MASS_BIN_TITLE_MAPPING = {
    'lowmass': 'LM',
    'medmass': 'MM',
    'highmass': 'HM'
}

constants.MCMC_DIR_BASE = '/global/homes/b/bzh/clamato-xcorr/data/split-mcmc'

In [None]:
def plot_model_w_data(survey_name, param_dict, model_num_params,
                      SigMin = 0., SigMax = 30.,
                      PiMin = -30., PiMax = 30.,
                      one_dimensional=False):

    PiBins0 = ascii.read(Path(constants.XCORR_DIR_BASE) / 'bins23_pi_0-30hMpc.txt')
    SigBins0 = ascii.read(Path(constants.XCORR_DIR_BASE) / 'bins10_sigma_0-30hMpc.txt')
    cosmo = constants.COSMOLOGY
    
    XCorr_obs = np.load(Path(constants.XCORR_DIR_BASE) / 'split' / f'xcorr_{survey_name}_globalf_{constants.DATA_VERSION}.npy')

    PiEdges = PiBins0['pi_edges'].data
    SigEdges = SigBins0['sigma_edges'].data

    SigEdgesVec, PiEdgesVec = np.meshgrid(SigEdges, PiEdges)

    SigEdgesPlot = SigEdgesVec / (np.ones(np.shape(SigEdgesVec))*[cosmo.h])
    PiEdgesPlot = PiEdgesVec / (np.ones(np.shape(PiEdgesVec))*[cosmo.h])
    
    SigCentersPlot = (SigEdges[1:] + SigEdges[:-1]) / 2
    SigCentersPlot /= cosmo.h

    vega = VegaInterface(Path(constants.MCMC_DIR_BASE) / survey_name.split('_')[0] / f'main_{survey_name}.ini')
    if one_dimensional:
        VegaInterface.log_lik = one_dimensional_log_lik
    
    for k, v in param_dict.items():
        vega.params[k] = v
    if 'beta_QSO' not in param_dict and 'bias_eta_QSO' not in param_dict:
        vega.params['beta_QSO'] = vega.params['growth_rate'] / vega.params['bias_QSO']
    XModel = vega.compute_model()['qsoxlya'].reshape(*XCorr_obs.shape)
    XModelPlot = XModel.T
        
    # Verify that the model is now in the same shape as the observed (which should be in the 
    # 'original' shape)
    XCorrPlot = np.transpose(XCorr_obs)
    if not np.array_equal(np.shape(XModelPlot),np.shape(XCorrPlot)):
        if np.array_equal(np.shape(np.transpose(XCorrPlot)),np.shape(XModelPlot)):
            XCorrPlot = np.transpose(XCorrPlot)
        else:
            print('Input XCorr array not compatible with model array!')
            
    if one_dimensional:
        _, (chi2, reduced_covar, mask_nonflat) = vega.log_lik(return_aux=True)

        # Reduce xcorrs.
        XCorrPlot = np.ma.array(XCorrPlot, mask=mask_nonflat.T).sum(axis=0)
        XModelPlot = np.ma.array(XModelPlot, mask=mask_nonflat.T).sum(axis=0)
        assert len(XCorrPlot) == len(SigCentersPlot)
        assert XCorrPlot.ndim == 1

        plt.errorbar(SigCentersPlot, XCorrPlot, yerr=np.sqrt(np.diag(reduced_covar)), color='black', label='Observed')
        plt.plot(SigCentersPlot, XModelPlot, color='red', label='Best-fit model')
        plt.xlabel(r'$\sigma\; (\mathrm{cMpc})$')
        plt.ylabel(r'$\pi$-summed cross-correlation')
        plt.title(f'Reduced $\chi^2$ = {chi2 / (np.sum(~XCorrPlot.mask) - model_num_params):.3f}', y=0.9, fontsize=10)
    else:
        fig, (ax1, ax2) = plt.subplots(1,2,figsize=(5,5))

        ax1.pcolormesh(SigEdgesPlot, PiEdgesPlot, XCorrPlot,cmap='jet_r',vmin=-0.2, vmax=0.1 )
        ax1.set_aspect('equal')
        ax1.set_xlim(0., SigMax)
        ax1.set_ylim(PiMin, PiMax)
        ax1.set_xlabel(r'$\sigma\; (\mathrm{cMpc})$')
        ax1.set_ylabel(r'$\pi\; (\mathrm{cMpc})$')
        # ax1.set_title(survey_name,fontsize=10)

        ax2.pcolormesh(SigEdgesPlot, PiEdgesPlot, XModelPlot,cmap='jet_r',vmin=-0.2, vmax=0.1 )
        ax2.set_aspect('equal')
        ax2.set_xlim(SigMin, SigMax)
        ax2.set_ylim(PiMin, PiMax)
        ax2.set_xlabel(r'$\sigma\; (\mathrm{cMpc})$')
        # ax2.set_title(f'chi-sq = {vega.chi2():.1f}',fontsize=10)
        # print(vega.corr_items['qsoxlya'].rp_rt_grid[1][~vega.data['qsoxlya'].mask])
        fig.suptitle(f'Reduced $\chi^2$ = {vega.chi2() / (np.sum(vega.data["qsoxlya"].mask) - model_num_params):.3f}', y=0.9, fontsize=10)

In [None]:
def corner_plot(survey_name, chain_suffix='', config_suffix=None, show_grid_fit=False, one_dimensional=False, **kwargs):
    backend = emcee.backends.HDFBackend(os.path.join(constants.MCMC_DIR_BASE, survey_name, f'chain_{survey_name}{chain_suffix}.hdf5'), read_only=True)
    config = None
    if config_suffix is None:
        config_suffix = chain_suffix
    for p in glob.glob('mcmc_cfg/*.yaml'):
        if p.split('/')[-1].casefold() == f'{survey_name.casefold()}_split{config_suffix}.yaml':
            with open(p, 'r') as f:
                config = yaml.safe_load(f)
    if not config:
        raise RuntimeError
    if 'beta_QSO' in config['initial'] and config['initial']['beta_QSO'] is None:
        del config['initial']['beta_QSO']
    tau = backend.get_autocorr_time(quiet=True)
    burnin = int(2 * np.max(tau))
    thin = int(0.5 * np.min(tau))
    # thin = int(np.max(tau))
    chain_len = len(backend.get_chain())
    samples = backend.get_chain(discard=burnin, flat=True, thin=thin)
    
    print(f"Autocorrelation time: {tau}")
    print(f"Chain length: {chain_len}; Chain length / 50: {chain_len / 50:.2f}")
    print("Burn-in: {0}".format(burnin))
    print("Thin: {0}".format(thin))
    print("Flat chain shape: {0}; {1:.1f} autocorrelation times.".format(samples.shape, samples.shape[0] / np.max(tau)))
    
    final_medians = np.median(samples, axis=0)
    unflat_samples = backend.get_chain(discard=burnin, flat=False, thin=thin)
    # print(unflat_samples.shape)
    for i in range(1, unflat_samples.shape[0]):
        subsample = unflat_samples[:i].reshape(-1, samples.shape[-1])
        if np.allclose(np.median(subsample, axis=0), final_medians, rtol=0.05):
            print(f'# chain iterations to arrive at ~5% parameter estimate: {i * thin + burnin}')
            break
    
    if show_grid_fit:
        truths = config['initial'].values()
    else:
        truths = None
        
    # For corner plot, multiply delta_z by -1 so we're consistent with definitions.
    delta_z_inverted_samples = np.copy(samples)
    try:
        assert 'drp_QSO' in config['initial']
        delta_z_ind = -1
        delta_z_inverted_samples[:, delta_z_ind] *= -1
        if truths:
            truths[delta_z_ind] *= -1
    except AssertionError:
        pass
    
    # Divide dispersion by 2 to get to what we consider z-dispersion.
    disp_corrected_samples = np.copy(delta_z_inverted_samples)
    try:
        assert 'sigma_velo_disp_gauss_QSO' in config['initial']
        sigma_z_ind = -2
        disp_corrected_samples[:, sigma_z_ind] /= 2
        if truths:
            truths[sigma_z_ind] /= 2
    except AssertionError:
        pass
    
    corner_labels = []
    for mass_bin in constants.STACKED_BIN_TITLES:
        corner_labels.append(f"{STACKED_MASS_BIN_TITLE_MAPPING[mass_bin]} {LABEL_MAPPING['bias_QSO']}")
    if 'sigma_velo_disp_gauss_QSO' in config['initial']:
        corner_labels.append(LABEL_MAPPING['sigma_velo_disp_gauss_QSO'])
    if 'drp_QSO' in config['initial']:
        corner_labels.append(LABEL_MAPPING['drp_QSO'])
    
    corner.corner(disp_corrected_samples, labels=corner_labels, show_titles=True, 
                  truths=truths, truth_color='red',
                  **kwargs)
    plt.suptitle(SURVEY_TITLE_MAPPING[survey_name], x=0.54, weight='bold', fontsize=14)
    plt.savefig(os.path.join(constants.FIG_DIR_BASE, 'split-mcmc', f'titledcorner_{survey_name}{chain_suffix}.png'))
    plt.suptitle('')
    plt.savefig(os.path.join(constants.FIG_DIR_BASE, 'split-mcmc', f'corner_{survey_name}{chain_suffix}.pdf'))
    plt.savefig(os.path.join(constants.FIG_DIR_BASE, 'split-mcmc', f'corner_{survey_name}{chain_suffix}.png'))
    plt.show()
    for i, mass_bin in enumerate(constants.STACKED_BIN_TITLES):
        median_params = {
            'bias_QSO': np.median(samples[:, i])
        }
        if 'sigma_velo_disp_gauss_QSO' in config['initial']:
            median_params['sigma_velo_disp_gauss_QSO'] = np.median(samples[:, sigma_z_ind])
        if 'drp_QSO' in config['initial']:
            median_params['drp_QSO'] = np.median(samples[:, delta_z_ind])
            
        if 'fixed' in config and config['fixed']:
            for k, p in config['fixed'].items():
                median_params[k] = p
        plot_model_w_data(f'{survey_name}_{mass_bin}', median_params, samples.shape[1] - (len(constants.STACKED_BIN_TITLES) - 1), one_dimensional=one_dimensional)
        plt.tight_layout()
    
        plt.savefig(os.path.join(constants.FIG_DIR_BASE, 'split-mcmc', f'bestmodel_{survey_name}_{mass_bin}{chain_suffix}.pdf'))
        plt.savefig(os.path.join(constants.FIG_DIR_BASE, 'split-mcmc', f'bestmodel_{survey_name}_{mass_bin}{chain_suffix}.png'))
        if one_dimensional:
            plt.title(f'{STACKED_MASS_BIN_TITLE_MAPPING[mass_bin]} {SURVEY_TITLE_MAPPING[survey_name]} | ' + plt.gca().get_title(), y=0.9, fontsize=10)
        else:
            plt.suptitle(f'{STACKED_MASS_BIN_TITLE_MAPPING[mass_bin]} {SURVEY_TITLE_MAPPING[survey_name]} | ' + plt.gcf()._suptitle.get_text(), y=0.9, fontsize=10)
        plt.savefig(os.path.join(constants.FIG_DIR_BASE, 'split-mcmc', f'titledbestmodel_{survey_name}_{mass_bin}{chain_suffix}.png'))
        plt.show()
    return samples

In [None]:
sample_2d = {}
sample_1d = {}

In [None]:
sample_2d['clamato'] = corner_plot('clamato', chain_suffix='', config_suffix='')

In [None]:
sample_1d['clamato'] = corner_plot('clamato', chain_suffix='_oned', one_dimensional=True)

In [None]:
sample_2d['vuds'] = corner_plot('vuds')

In [None]:
sample_1d['vuds'] = corner_plot('vuds', chain_suffix='_oned', one_dimensional=True)

In [None]:
sample_2d['zDeep'] = corner_plot('zDeep')

In [None]:
sample_1d['zDeep'] = corner_plot('zDeep', chain_suffix='_oned', one_dimensional=True)

In [None]:
sample_2d['mosdef'] = corner_plot('mosdef')

In [None]:
sample_1d['mosdef'] = corner_plot('mosdef', chain_suffix='_oned', one_dimensional=True)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for survey, samples in sample_2d.items():
    for i, label in enumerate(['Low mass', 'Medium mass', 'High mass']):
        axes[i].hist(samples[:, i], bins=30, alpha=0.4, label=survey, density=True)
        axes[i].set_title(label)
plt.suptitle('2D cross-correlation')
plt.legend()
plt.tight_layout()

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for survey, samples in sample_1d.items():
    for i, label in enumerate(['Low mass', 'Medium mass', 'High mass']):
        axes[i].hist(samples[:, i], bins=30, alpha=0.4, label=survey, density=True)
        axes[i].set_title(label)
plt.suptitle('1D cross-correlation')
plt.legend()
plt.tight_layout()

In [None]:
plt.figure(figsize=(1.5, 5))
img = plt.imshow(np.array([[-0.2, 0.1]]), cmap='jet_r')
img.set_visible(False)
plt.axis('off')
plt.colorbar(fraction=1)
plt.savefig(os.path.join(constants.FIG_DIR_BASE, 'split-mcmc', 'bestmodel_colorbar.png'))

In [None]:
pwd = os.getcwd()
os.chdir(os.path.join(constants.FIG_DIR_BASE, 'split-mcmc'))
os.system('./create-mosaic.sh')
os.chdir(pwd)