# get libraries 

In [None]:
import sys
import os

# this is Cullan's code to run and plot the second gen mocks with all appropriate settings 

sys.path.append("..")
sys.path.append("../..")
sys.path.append("../../Barry/")
from barry.samplers import NautilusSampler
from barry.config import setup
from barry.models import PowerBeutler2017, CorrBeutler2017
from barry.datasets.dataset_power_spectrum import PowerSpectrum_DESI_KP4
from barry.datasets.dataset_correlation_function import CorrelationFunction_DESI_KP4
from barry.fitter import Fitter
import numpy as np
import scipy as sp
import pandas as pd
from barry.models.model import Correction
from barry.utils import weighted_avg_and_cov
import matplotlib.colors as mplc
import matplotlib.pyplot as plt
import pickle
from scipy.stats import gaussian_kde 
from chainconsumer import ChainConsumer, Chain, Truth, PlotConfig

# choose settings to combine mocks - which mocks 
### 26 prerecon spline beta free 0 - 25
### 26 prerecon spline beta fixed 26 - 51
### 26 prerecon poly beta free 52 - 77
### 26 prerecon poly beta fixed 78 - 103

### 26 postrecon spline beta free 104 - 129 
### 26 postrecon spline beta fixed 130 - 155
### 26 postrecon poly beta free 156 - 181 
### 26 postrecon poly beta fixed 182 - 207

In [None]:
broadband_method = 'spline' # or 'poly'
recon = 'post' # or 'pre'
data = 'pk' # or 'xi'
betaphaseshiftfit = True  
extra = ''
if betaphaseshiftfit:
    extra = '_betafit'

list_paramswanted = [r'$\alpha$', r'$\epsilon$', 'weights']

if betaphaseshiftfit:
    list_paramswanted = [r'$\alpha$', r'$\epsilon$', r'$\beta_{\phi(N_{\mathrm{eff}})}$', 'weights']
    
copy_list_BGSQSO = list_paramswanted.copy()
copy_list_BGSQSO.remove(r'$\epsilon$')

list_loop = np.arange(0,25+1)

if broadband_method == 'spline' and recon == 'pre' and betaphaseshiftfit:
    list_loop = np.arange(0,25+1)
elif broadband_method == 'spline' and recon == 'pre' and not betaphaseshiftfit:
    list_loop = np.arange(26,51+1)
elif broadband_method == 'poly' and recon == 'pre' and betaphaseshiftfit:
    list_loop = np.arange(52,77+1)
elif broadband_method == 'poly' and recon == 'pre' and not betaphaseshiftfit:
    list_loop = np.arange(78,103+1)
elif broadband_method == 'spline' and recon == 'post' and betaphaseshiftfit:
    list_loop = np.arange(104,129+1)
elif broadband_method == 'spline' and recon == 'post' and not betaphaseshiftfit:
    list_loop = np.arange(130,155+1)
elif broadband_method == 'poly' and recon == 'post' and betaphaseshiftfit:
    list_loop = np.arange(156,181+1)
else: 
    list_loop = np.arange(182,207+1)

In [None]:
print(list_loop) 

# data paths 

In [None]:
# using post-recon mocks for all except QSOs - too few QSOs to have good reconstruction 
path = '/global/u1/a/abbew25/files_secondgen_chains/'

path = '/global/u1/a/abbew25/barryrepo/Barry/cosmodesi_KP4ELG_examplecode_make_picklefiles/plots/desi_kp4_SecondGen_'

# QSOs - no epsilon fits 
with open(path + 'QSOs_z08_21_' + data + '/output/desi_kp4_SecondGen_QSOs_z08_21_' + data + '.fitter.pkl', 'rb') as pickle_file:
    fitter_QSO = pickle.load(pickle_file)

# print(sys.getsizeof(fitter_QSO))

# BGS - no epsilon fits 
with open(path + 'BGS_z01_04_' + data + '/output/desi_kp4_SecondGen_BGS_z01_04_' + data + '.fitter.pkl', 'rb') as pickle_file:
    fitter_BGS = pickle.load(pickle_file)

# print(sys.getsizeof(fitter_BGS))

# LRG1
with open(path + 'LRGs_z04_06_' + data + '/output/desi_kp4_SecondGen_LRGs_z04_06_' + data + '.fitter.pkl', 'rb') as pickle_file:
    fitter_LRG1 = pickle.load(pickle_file)

# print(sys.getsizeof(fitter_LRG1))

# LRG2
with open(path + 'LRGs_z06_08_' + data + '/output/desi_kp4_SecondGen_LRGs_z06_08_' + data + '.fitter.pkl', 'rb') as pickle_file:
    fitter_LRG2 = pickle.load(pickle_file)

# print(sys.getsizeof(fitter_LRG2))

# LRG3 or LRG3+ELG1
if data == 'pk':
    with open(path + 'LRGs_z08_11_' + data + '/output/desi_kp4_SecondGen_LRGs_z08_11_' + data + '.fitter.pkl', 'rb') as pickle_file:
        fitter_LRG3 = pickle.load(pickle_file)

else:
    with open(path + 'ELGsLRGscombined_z08_11_' + data + '/output/desi_kp4_SecondGen_ELGsLRGscombined_z08_11_' + data + '.fitter.pkl', 'rb') as pickle_file:
        fitter_LRG3ELG1 = pickle.load(pickle_file)

# print(sys.getsizeof(fitter_LRG3))

# ELG2 
with open(path + 'ELGs_z11_16_' + data + '/output/desi_kp4_SecondGen_ELGs_z11_16_' + data + '.fitter.pkl', 'rb') as pickle_file:
    fitter_ELG2 = pickle.load(pickle_file)
    
# print(sys.getsizeof(fitter_ELG2))

fitters = {'QSO': fitter_QSO, 'BGS': fitter_BGS, 'LRG1': fitter_LRG1, 
           'LRG2': fitter_LRG2, 'ELG2': fitter_ELG2}
if data == 'pk': 
    fitters['LRG3'] = fitter_LRG3
else:
    fitters['LRG3ELG1'] = fitter_LRG3ELG1
    

# lets get the mock realization 0 and try construct a KDE and plot it 


In [None]:
realisation_number = 104 # mock 0 with beta fixed, spline method, prerecon 

weight, chain, model = fitter_ELG2.load()[realisation_number][1], fitter_ELG2.load()[realisation_number][2], fitter_ELG2.load()[realisation_number][4]

# df_BGS = pd.DataFrame(chain, columns=model.get_labels())
print(model.get_labels())
# df_BGS['weights'] = weight

# del weight, chain, model

# xmin = np.min(df_BGS[r'$\alpha$'].to_numpy())
# xmax = np.max(df_BGS[r'$\alpha$'].to_numpy())

# positions = np.linspace(xmin, xmax, 100)

# # take the interpolated points and shape the output to be 2D 
# kde_BGS = gaussian_kde(np.vstack([
#     df_BGS[r'$\alpha$'].to_numpy()]), 
#     weights=df_BGS['weights'].to_numpy())

# Z = kde_BGS(positions)
# Z = np.flip(Z, axis=0)

# plt.plot(positions, Z)

# plot the results 
# import matplotlib as mpl

# fig, ax = plt.subplots()

# # ax.plot(chains_ELGs_z_1p1_1p6[r'$\alpha$'].to_numpy(), chains_ELGs_z_1p1_1p6[r'$\epsilon$'].to_numpy(), 
# #     'k.', markersize=0.5, alpha = 0.2)

# maxxarg, maxyarg = np.unravel_index(np.argmax(np.flip(Z,axis=0).T), shape=X.shape)

# plt.scatter(x[maxxarg], y[maxyarg], color='red', marker='*', s=26)

# ax.imshow(Z, cmap=plt.cm.gist_earth_r, extent=[xmin, xmax, ymin, ymax])

# norm = mpl.colors.Normalize(vmin=np.min(Z.reshape(-1)), vmax=np.max(Z.reshape(-1)))

# sm = plt.cm.ScalarMappable(cmap=plt.cm.gist_earth_r, norm=norm)
# plt.colorbar(sm, ax=ax)

# ax.set_xlim([xmin, xmax])

# ax.set_ylim([ymin, ymax])

# plt.show()


# here putting relevant functions for MCMC 

In [None]:
def log_prob_betaphaseshift(x):
    
    # x is a vector with alpha, epsilon x 5 for each dataset - in a given order 
    # get the likelihood from each KDE 

    qso = dataframes_kdes['QSO'][1]([x[0], x[10]])[0]
    bgs = dataframes_kdes['BGS'][1]([x[1], x[10]])[0]
    lrg1 = dataframes_kdes['LRG1'][1]([x[2], x[3], x[10]])[0]
    lrg2 = dataframes_kdes['LRG2'][1]([x[4], x[5], x[10]])[0]
    if data == 'pk':
        lrg3 = dataframes_kdes['LRG3'][1]([x[6], x[7], x[10]])[0]
    else:
        lrg3 = dataframes_kdes['LRG3ELG1'][1]([x[6], x[7], x[10]])[0]
    elg2 = dataframes_kdes['ELG2'][1]([x[8], x[9], x[10]])[0]
    
    
    if qso <= 0.0 or abs(x[0])-1.0 >= 0.2 or abs(qso) == np.inf:
        qso = -np.inf 
    else:
        qso = np.log(qso)
    
    if bgs <= 0.0 or abs(x[1])-1.0 >= 0.2 or abs(bgs) == np.inf:
        bgs = -np.inf 
    else:
        bgs = np.log(bgs)
        
    if lrg1 <= 0.0 or abs(x[2])-1.0 >= 0.2 or abs(x[3]) >= 0.2 or abs(lrg1) == np.inf: 
        lrg1 = -np.inf 
    else:
        lrg1 = np.log(lrg1)
    
    if lrg2 <= 0.0 or abs(x[4])-1.0 >= 0.2 or abs(x[5]) >= 0.2 or abs(lrg2) == np.inf: 
        lrg2 = -np.inf 
    else:
        lrg2 = np.log(lrg2)
        
    if lrg3 <= 0.0 or abs(x[6])-1.0 >= 0.2 or abs(x[7]) >= 0.2 or abs(lrg3) == np.inf: 
        lrg3 = -np.inf 
    else:
        lrg3 = np.log(lrg3)
    
    if elg2 <= 0.0 or abs(x[8])-1.0 >= 0.2 or abs(x[9]) >= 0.2 or abs(elg2) == np.inf: 
        elg2 = -np.inf 
    else:
        elg2 = np.log(elg2)
        
        
    if abs(lrg1) == np.inf or abs(lrg2) == np.inf or abs(lrg3) == np.inf or abs(elg2) == np.inf or abs(qso) == np.inf or abs(bgs) == np.inf:
        logl = -np.inf 
    elif x[10] > 9 or x[10] < -3.0:
        logl = -np.inf
    else: 
        logl = elg2 + lrg1 + lrg2 + lrg3 + qso + bgs 
    
    #if np.iscomplex(logl):
    #print(logl, x)
        
    return logl 


labels_pk = [r'$\alpha_{\mathrm{QSO}}$',
          r'$\alpha_{\mathrm{BGS}}$',
          r'$\alpha_{\mathrm{LRG1}}$', 
          r'$\epsilon_{\mathrm{LRG1}}$',
          r'$\alpha_{\mathrm{LRG2}}$', 
          r'$\epsilon_{\mathrm{LRG2}}$',
          r'$\alpha_{\mathrm{LRG3}}$', 
          r'$\epsilon_{\mathrm{LRG3}}$',
           r'$\alpha_{\mathrm{ELG2}}$', 
          r'$\epsilon_{\mathrm{ELG2}}$', 
          r'$\beta_{N_{\mathrm{eff}}}$'
         ]

labels_xi = [r'$\alpha_{\mathrm{QSO}}$',
          r'$\alpha_{\mathrm{BGS}}$',
          r'$\alpha_{\mathrm{LRG1}}$', 
          r'$\epsilon_{\mathrm{LRG1}}$',
          r'$\alpha_{\mathrm{LRG2}}$', 
          r'$\epsilon_{\mathrm{LRG2}}$',
          r'$\alpha_{\mathrm{LRG3ELG1}}$', 
          r'$\epsilon_{\mathrm{LRG3ELG1}}$',
           r'$\alpha_{\mathrm{ELG2}}$', 
          r'$\epsilon_{\mathrm{ELG2}}$', 
          r'$\beta_{N_{\mathrm{eff}}}$'
         ]

if data == 'pk':
    labels = labels_pk
else:
    labels = labels_xi

## Ok lets loop through the mocks we want and get the KDE for each, then multiply them together to get a combined likelihood. 

## Then run an MCMC fit to the likelihood in order to get a fit to beta given the information from all the mocks.

## Save the best fit values to a text file. 

In [None]:
import emcee 
# count = 0
# for posterior, weight, chain, evidence, model, data, extra in fitters['BGS'].load():
    
#     print(count, model.broadband_type, model.recon, model.get_labels(), extra['name'])
    
#     count+= 1 

count = 0 
for realisation_number in [104]: # list_loop: 
    
    dataframes_kdes = {} 
    
    for item in fitters:
    
        weight, chain, model = fitters[item].load()[realisation_number][1], fitters[item].load()[realisation_number][2], fitters[item].load()[realisation_number][4]
        #print(model.broadband_type, model.recon, dir(model))
        #print(model.get_labels())
        df = pd.DataFrame(chain, columns=model.get_labels())
        df['weights'] = weight
        
        print(model.get_labels(), item)
        
        if item in ['QSO', 'BGS']:
            df = df[copy_list_BGSQSO] 
        else:
            df = df[list_paramswanted] 
    
        dataframes_kdes[item] = [] 
        dataframes_kdes[item].append(df) 
        
        if item in ['QSO', 'BGS']:
            
            kde = gaussian_kde(np.vstack([
                df[r'$\alpha$'].to_numpy(),
                df[r'$\beta_{\phi(N_{\mathrm{eff}})}$'].to_numpy()]), 
                weights=df['weights'].to_numpy())
            
        else:                   #elif betaphaseshiftfit and item not in ['QSO', 'BGS']:
            
            kde = gaussian_kde(np.vstack([
                df[r'$\alpha$'].to_numpy(),
                df[r'$\epsilon$'].to_numpy(),
                df[r'$\beta_{\phi(N_{\mathrm{eff}})}$'].to_numpy()]), 
                weights=df['weights'].to_numpy())
            
        dataframes_kdes[item].append(kde) 
#         elif not betaphaseshiftfit and item in ['QSO', 'BGS']:
            
#             kde = gaussian_kde(np.vstack([
#                 df[r'$\alpha$'].to_numpy()]), 
#                 weights=df['weights'].to_numpy())
             
#         else:
            
#             kde = gaussian_kde(np.vstack([
#                 df[r'$\alpha$'].to_numpy(),
#                 df[r'$\epsilon$'].to_numpy()]), 
#                 weights=df['weights'].to_numpy())
        
#         dataframes_kdes[item].append(kde) 
        
        
    # now run an MCMC fit to the combined likelihood of the KDES (function in previous cell) 
    dim = 11
    np.random.seed(42)
    nwalkers = 32                                                                                          
    p0 = np.array([np.random.uniform(0.99, 1.01, nwalkers),  
                   np.random.uniform(0.99, 1.01, nwalkers),  
                   np.random.uniform(0.99, 1.01, nwalkers),  np.random.uniform(-0.01, 0.01, nwalkers), 
                   np.random.uniform(0.99, 1.01, nwalkers),  np.random.uniform(-0.01, 0.01, nwalkers), 
                   np.random.uniform(0.99, 1.01, nwalkers),  np.random.uniform(-0.01, 0.01, nwalkers), 
                   np.random.uniform(0.99, 1.01, nwalkers),  np.random.uniform(-0.01, 0.01, nwalkers), 
                   np.random.uniform(0.99, 1.01, nwalkers)
                     ]).T

    # We'll track how the average autocorrelation time estimate changes

    max_n = 20000
    index = 0
    autocorr = np.empty(max_n)

    # This will be useful to testing convergence
    old_tau = np.inf

    sampler = emcee.EnsembleSampler(nwalkers, dim, log_prob_betaphaseshift)

    # Now we'll sample for up to max_n steps
    for sample in sampler.sample(p0, iterations=max_n, progress=True):
        # Only check convergence every 100 steps
        if sampler.iteration % 100:
            continue

        # Compute the autocorrelation time so far
        # Using tol=0 means that we'll always get an estimate even
        # if it isn't trustworthy
        tau = sampler.get_autocorr_time(tol=0)
        autocorr[index] = np.mean(tau)
        index += 1

        # Check convergence
        converged = np.all(tau * 100 < sampler.iteration)
        converged &= np.all(np.abs(old_tau - tau) / tau < 0.01)
        if converged:
            break
        old_tau = tau
    
    chains_flat = sampler.get_chain(flat=True, discard=5000)

    df_fit = pd.DataFrame({labels_xi[i]: chains_flat[:,i] for i in np.arange(len(chains_flat[0,:]))})
    
    # save to a file 
    df_fit.to_csv("/global/u1/a/abbew25/barryrepo/Barry/cosmodesi_KP4ELG_examplecode_make_picklefiles/combinedfits_secondgen_mocks_v1_2/"+data+"_"+recon+"_"+broadband_method +"_"+extra+".csv")
    count += 1 
# plot the result 




In [None]:
# sampler.sample(, iterations=5000, progress=True)

In [None]:
print(tau)

In [None]:
df_fit = pd.DataFrame({labels_xi[i]: chains_flat[:,i] for i in np.arange(len(chains_flat[0,:]))})


In [None]:
from chainconsumer import ChainConsumer 
from chainconsumer import Chain
c = ChainConsumer() 

c.add_chain(Chain(samples=df_fit, name='chains'))
c.add_truth(Truth(location={r'$\beta_{N_{\mathrm{eff}}}$': 1.0}))
c.plotter.plot(
    columns = [r'$\beta_{N_{\mathrm{eff}}}$']
    )

In [None]:
# this is Cullan's code to run and plot the second gen mocks with all appropriate settings 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from chainconsumer import ChainConsumer, Chain, Truth, PlotConfig

df = pd.read_csv(
"/global/u1/a/abbew25/barryrepo/Barry/cosmodesi_KP4ELG_examplecode_make_picklefiles/combinedfits_secondgen_mocks_v1_2/xi_post_poly_160.csv")


  
cov = np.vstack((df[r'$\alpha_{\mathrm{LRG3ELG1}}$'], 
                 df[r'$\epsilon_{\mathrm{LRG3ELG1}}$'], 
                 df[r'$\beta_{N_{\mathrm{eff}}}$']))

means = np.mean(cov, axis=1)

cov = np.cov(cov)
fisher_matrix = np.linalg.inv(cov)


truths = {r'$\alpha$': 1.0, r'$\epsilon$': 0.0, r'$\beta$': 1.0}
cov = np.matrix(np.linalg.inv(fisher_matrix))
names = [r'$\alpha$', r'$\epsilon$', r'$\beta$']

c = ChainConsumer()
c.add_chain(Chain.from_covariance(mean=means, 
                covariance=cov, columns=names, name='cov'))

c.plotter.plot()


# pts = np.random.multivariate_normal([1.0, 0, 1.0], cov, size=10000)

# df = pd.DataFrame({names[i]: pts[:,i] for i in np.arange(len(names))})
# c.add_chain(Chain(samples=df, name='chains'))
# c.add_truth(Truth(location=truths))


In [None]:
# this is Cullan's code to run and plot the second gen mocks with all appropriate settings 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
plt.style.use('ggplot')
plt.figure(facecolor="white")
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['figure.dpi'] = 150
plt.rcParams.update({'font.size': 7})

df = pd.read_csv(
"/global/u1/a/abbew25/barryrepo/Barry/cosmodesi_KP4ELG_examplecode_make_picklefiles/combinedfits_secondgen_mocks_v1_2/xi_post_poly_160.csv")
  
cov = np.vstack((df[r'$\alpha_{\mathrm{LRG3ELG1}}$'], 
                 df[r'$\epsilon_{\mathrm{LRG3ELG1}}$'], 
                 df[r'$\beta_{N_{\mathrm{eff}}}$']))

means = np.mean(cov, axis=1)

cov = np.cov(cov)
print(cov)
truths = {r'$\alpha$': 1.0, r'$\epsilon$': 0.0, r'$\beta$': 1.0}
names = [r'$\alpha$', r'$\epsilon$', r'$\beta$']

def plot_cov_ellipses(cov, params, ndim, means):
    
    f, axes = plt.subplots(ndim,ndim)
    
    for i in range(0, ndim):
        for j in range(0, ndim):
            
            if j >= i:
                axes[i][j].set_visible(False)
                
            else:
                
                var_x = cov[i,i]
                var_y = cov[j,j]
                covar_xy = cov[i,j]
                
                a = (var_x + var_y)/2.0 + np.sqrt( ((var_x - var_y)**2)/4.0 + covar_xy**2) 
                b = (var_x + var_y)/2.0 - np.sqrt( ((var_x - var_y)**2)/4.0 + covar_xy**2) 
                
                width_1sigma,height_1sigma = np.sqrt(a)*1.52, np.sqrt(b)*1.52 
                width_2sigma,height_2sigma = np.sqrt(a)*2.48, np.sqrt(b)*2.48
                
                theta = np.arctan2(2.0*covar_xy, (var_x - var_y)) / 2.0
                print(theta*180.0/np.pi, params[i], params[j])
                ellipse1 = Ellipse(xy=(means[i], means[j]), width=width_1sigma, height=height_1sigma, angle=theta*180.0/np.pi)
                ellipse2 = Ellipse(xy=(means[i], means[j]), width=width_2sigma, height=height_2sigma, angle=theta*180.0/np.pi)
                

                axes[i][j].add_patch(ellipse1)
                axes[i][j].add_patch(ellipse2)
                
                ellipse1.set(alpha=(1.0), facecolor = 'green')
                ellipse2.set(alpha=(0.2), facecolor = 'purple')
                
                axes[i][j].scatter(means[i], means[j], color='white')
                axes[i][j].set_xlabel(params[i])
                axes[i][j].set_ylabel(params[j])
                
                axes[i][j].set_xlim([means[i]-2*width_1sigma, means[i]+2*width_1sigma])
                axes[i][j].set_ylim([means[j]-2*height_1sigma, means[j]+2*height_1sigma])
                axes[i][j].set_aspect(width_1sigma/height_1sigma)
                
    plt.subplots_adjust(hspace=0.4, right=0.75)
    plt.show() 
                
            
plot_cov_ellipses(cov, names, 3, [1.0, 0.0, 1.0])



# read in the chains for the combined fits to the datasets and plot 

In [None]:
# this is Cullan's code to run and plot the second gen mocks with all appropriate settings 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from chainconsumer import ChainConsumer, Chain, Truth, PlotConfig

c = ChainConsumer() 

main = '/global/u1/a/abbew25/barryrepo/Barry/cosmodesi_KP4ELG_examplecode_make_picklefiles/combinedfits_secondgen_mocks_v1_2/'

count = 0

for i in np.arange(0, 1):
    df = pd.read_csv(main + "xi_pre_spline_" + str(i) + ".csv", index_col=0)
    
    name = 'chain ' + str(count) 
    if count == 0:
        name = 'chain mock mean' 
        
    df = df.dropna(subset=df.columns)
    
    c.add_chain(Chain(samples=df, name=name))
    count += 1 

c.add_truth(Truth(location={r'$\alpha_{\mathrm{QSO}}$': 1.0,
          r'$\alpha_{\mathrm{BGS}}$': 1.0,
          r'$\alpha_{\mathrm{LRG1}}$': 1.0,
          r'$\epsilon_{\mathrm{LRG1}}$': 0.0,
          r'$\alpha_{\mathrm{LRG2}}$': 1.0,
          r'$\epsilon_{\mathrm{LRG2}}$': 0.0,
                            
          r'$\alpha_{\mathrm{LRG3ELG1}}$': 1.0,
          r'$\epsilon_{\mathrm{LRG3ELG1}}$': 0.0,
                            
#              r'$\alpha_{\mathrm{LRG3}}$': 1.0,
#              r'$\epsilon_{\mathrm{LRG3}}$': 0.0,               
                            
           r'$\alpha_{\mathrm{ELG2}}$': 1.0, 
          r'$\epsilon_{\mathrm{ELG2}}$': 0.0,
          r'$\beta_{N_{\mathrm{eff}}}$': 1.0,
                           }
))
c.plotter.plot()#columns=[r'$\beta_{N_{\mathrm{eff}}}$'])