# Pickle Abacus SecondGen Xi files for Barry 

In [None]:
# Import the necessary packages, set up the fiducial cosmology and save the DESI template
import os
import pickle
import numpy as np
import scipy as sp
import pandas as pd
from astropy.io import ascii
import matplotlib.pyplot as plt
from scipy.interpolate import splrep, splev
from cosmoprimo import PowerSpectrumBAOFilter
from cosmoprimo.fiducial import DESI, AbacusSummit
from pypower import BaseMatrix, CatalogFFTPower, CatalogFFTCorr, PowerSpectrumMultipoles, PowerSpectrumSmoothWindow, PowerSpectrumSmoothWindowMatrix, PowerSpectrumOddWideAngleMatrix, setup_logging
from pycorr import TwoPointCorrelationFunction, project_to_multipoles

cosmo = DESI() 
print(cosmo["Omega_b"]*cosmo["h"]**2, cosmo["Omega_cdm"]*cosmo["h"]**2, cosmo["Omega_m"]*cosmo["h"]**2 - cosmo["Omega_b"]*cosmo["h"]**2)
print(cosmo["A_s"], cosmo["n_s"], cosmo["tau_reio"])
print(np.sum(cosmo["m_ncdm"]))

print(cosmo.sigma8_z)
print(cosmo["Omega_m"], 
      cosmo.Omega_m(0.11), 
      cosmo.growth_rate(0.0), 
      cosmo.growth_rate(0.11), 
      cosmo.growth_rate(0.0)*cosmo.sigma8_z(0.0), 
      cosmo.growth_rate(0.11)*cosmo.sigma8_z(0.11)
     )

# Save the default DESI template to a file
k_min = 1e-4
k_max = 5
k_num = 2000
kl = np.logspace(np.log(k_min), np.log(k_max), k_num, base=np.e)
pkz = cosmo.get_fourier().pk_interpolator()
pkz0 = pkz.to_1d(z=0)
pkv = pkz0(kl)
pknow = PowerSpectrumBAOFilter(pkz0, engine='wallish2018').smooth_pk_interpolator()
pksmv = pknow(kl)
#np.savetxt("./DESI_Pk_template.dat", np.c_[kl, pksmv, pkv/pksmv - 1.0],  fmt="%g %g %g", header="k     pk_smooth     pk_ratio")
#np.savetxt("./DESI_Pk_z0p00.dat", np.c_[kl, pkv, pksmv, pkv/pksmv - 1.0],  fmt="%g %g %g %g", header="k     pk         pk_smooth     pk_ratio")

#pkz011 = pkz.to_1d(z=0.11)
#pkv = pkz011(kl)
#pknow = PowerSpectrumBAOFilter(pkz011, engine='wallish2018').smooth_pk_interpolator()
#pksmv = pknow(kl)
#np.savetxt("./DESI_Pk_z0p11.dat", np.c_[kl, pkv, pksmv, pkv/pksmv - 1.0],  fmt="%g %g %g %g", header="k     pk   

# Correlation function routines 

In [None]:
# Useful utility function to collate some Xi data
def collect_xi_data(pre_files, post_files, pre_cov_files, post_cov_files, pre_files_name, post_files_name, pre_cov_name, post_cov_name, zs, reconsmooth, mocks, rpcut, imaging):

    pre_data, post_data = None, None
    
    pre_mocks = get_xi2(pre_files, pre_files_name, mocks, rpcut, imaging) if pre_files_name is not None else None
    post_mocks = get_xi2(post_files, post_files_name, mocks, rpcut, imaging) if post_files_name is not None else None
    
    pre_cov = get_xi_cov(pre_cov_files, pre_cov_name, rpcut, imaging) if pre_cov_name is not None else None
    post_cov = get_xi_cov(post_cov_files, post_cov_name, rpcut, imaging) if post_cov_name is not None else None
    
    rp = f" {imaging} rpcut2.5" if rpcut else f" {imaging}" 
    
    split = {
        "n_data": 1,
        "pre-recon data": pre_data,
        "pre-recon cov": pre_cov,
        "post-recon data": post_data,
        "post-recon cov": post_cov,
        "pre-recon mocks": pre_mocks,
        "post-recon mocks": post_mocks,
        "cosmology": {
            "om": cosmo["Omega_m"],
            "h0": cosmo["h"],
            "z": (zs[1]+zs[0])/2.0,
            "ob": cosmo["Omega_b"],
            "ns": cosmo["n_s"],
            "mnu": np.sum(cosmo["m_ncdm"]),
            "reconsmoothscale": reconsmooth,
        },
        "name": "DESI SecondGen " + f"sm{reconsmooth} " +  ("_").join(pre_files_name.split("_")[1:]) + rp
    }
    
    with open(f"/global/u1/a/abbew25/barryrepo/Barry/cosmodesi_KP4ELG_examplecode_make_picklefiles/DESI_SecondGen_sm{reconsmooth}_" + ("_").join(pre_files_name.split("_")[1:]).lower() + ("_").join(rp.split(" ")) + "_xi.pkl", "wb") as f:
        pickle.dump(split, f)
        
    return split

# Correlation function
# def get_xi(loc, name, mocks, rpcut, imaging):
    
#     rp = "_rpcut2.5" if rpcut else "" 
    
#     xis = []
#     for mock in mocks:
#         if 'BGS_BRIGHT-21.5' in name and mock == 13:
#             continue
#         infile = loc + f"/mock{mock}/" + name + f"_{imaging}_lin4_njack0_nran4_split20{rp}.txt"

#         xi = pd.read_csv(infile, comment="#", skiprows=0, delim_whitespace=True, header=None, names=["s", "savg","xi0","xi2","xi4"])
#         xi = xi.drop(xi[xi["s"] < 20.0].index)
#         xis.append(xi)

#     return xis

def get_xi2(loc, name, nmocks, rpcut, imaging):
    
    xis = []
    print(nmocks)
    for mock in nmocks: 

        infile = loc + 'mock' + str(mock) + name + '.npy' 
        result = TwoPointCorrelationFunction.load(infile)
        factor = 4
        rebinned = result[:(result.shape[0] // factor) * factor:factor]
        sep, xi = rebinned(ells=(0, 2, 4), return_sep=True, return_std=False)
        xi = pd.DataFrame({'s': sep, 'xi0': xi[0], 'xi2': xi[1], 'xi4': xi[2]})
        xi = xi.drop(xi[xi["s"] < 20.0].index)
        xis.append(xi)
        
    return xis# [xis[["s","xi0","xi2","xi4"]]]

    # Correlation function covariance matrix.
def get_xi_cov(loc, name, rpcut, imaging):

    # s = "rescaled"
    # infile = loc + "xi024_" + name.replace("sm30","sm20") + f"_{imaging}_lin4_s20-200_cov_RascalC_{s}.txt"    # No recon_sm30 cov for QSO yet
    infile = loc + name 
    
    cov = pd.read_csv(infile, comment="#", delim_whitespace=True, header=None).to_numpy()
    #print(cov.shape)
    #plt.imshow(cov/np.sqrt(np.outer(np.diag(cov), np.diag(cov))))
    #plt.show()
    
    # Check the covariance matrix is invertible
    v = np.diag(cov @ np.linalg.inv(cov))
    if not np.all(np.isclose(v, 1)):
        print("ERROR, setting an inappropriate covariance matrix that is almost singular!!!!")

    return cov
    
# Plot the correlation function, for sanity checking
def plot_xi(split, pre=True, post=True):

    color = ["r", "b", "g"]
    ss = split["pre-recon mocks"][0]["s"]
    nmocks = len(split["pre-recon mocks"])
    label = [r"$\xi_{0}(k)$", r"$\xi_{2}(k)$", r"$\xi_{4}(k)$"]
    
    if pre:
        for m, xi in enumerate(["xi0", "xi2", "xi4"]):
            yerr = ss ** 2 * np.sqrt(np.diag(split["pre-recon cov"]))[m * len(ss) : (m + 1) * len(ss)]
            plt.errorbar(
                ss,
                ss ** 2 * np.mean([split["pre-recon mocks"][i][xi] for i in range(nmocks)], axis=0),
                yerr=yerr,
                marker="o",
                ls="None",
                c=color[m],
                label=label[m],
            )
            for i in range(nmocks):
                plt.errorbar(ss, ss ** 2 * split["pre-recon mocks"][i][xi], marker="None", ls="-", c='k', alpha=1.0 / nmocks**(3.0/4.0))
        plt.xlabel(r"$s$")
        plt.ylabel(r"$s^{2}\,\xi(s)$")
        plt.title(split["name"] + " Prerecon")
        plt.legend(loc='upper right')
        plt.show()
    if post:
        for m, xi in enumerate(["xi0", "xi2", "xi4"]):
            yerr = ss ** 2 * np.sqrt(np.diag(split["post-recon cov"]))[m * len(ss) : (m + 1) * len(ss)]
            plt.errorbar(
                ss,
                ss ** 2 * np.mean([split["post-recon mocks"][i][xi] for i in range(nmocks)], axis=0),
                yerr=yerr,
                marker="o",
                ls="None",
                c=color[m],
                label=label[m],
            )
            for i in range(nmocks):
                plt.errorbar(ss, ss ** 2 * split["post-recon mocks"][i][xi], marker="None", ls="-", c='k', alpha=1.0 / nmocks**(3.0/4.0))
        plt.xlabel(r"$s$")
        plt.ylabel(r"$s^{2}\,\xi(s)$")
        plt.title(split["name"] + " Postrecon")
        plt.legend(loc='upper right')
        plt.show()

# get file paths and pickle everything up 

In [None]:
# The catalogue version
version = 1.2
ffa = "ffa"               # Flavour of fibre assignment. Can be "ffa" for fast fiber assign, or "complete"
rpcut = False             # Whether or not to include the rpcut
imaging = "default_FKP"   # What form of imaging systematics to use. Can be "default_FKP", "default_FKP_addSN", or "default_FKP_addRF"

# This is a dictionary of all the combinations of dataset that we have and their redshift bins.
tracers = {'BGS_BRIGHT-21.5': [[0.1,0.4]],
           'LRG': [[0.4, 0.6], [0.6, 0.8], [0.8, 1.1]], 
           'ELG_LOP': [[0.8, 1.1], [1.1, 1.6]],
           'QSO': [[0.8, 2.1]]}

# How many complete mocks are available for each tracer? 
# While the mocks are still being processed, this allows us to skip over the missing entries
nmocks = {'BGS_BRIGHT-21.5': [0,25], 'LRG': [0,25], 'ELG_LOP': [0,25], 'QSO': [0,25]}

# This dictionary specifies the particulars of how reconstruction was run on each tracer. First entry is smoothing scale, second is type of recon. 
# QSO has no recon, so set to None so it can be skipped over later.
recon = {'BGS_BRIGHT-21.5': [15, "IFTrecsym"],
         'LRG': [10, "IFTrecsym"], 
         'ELG_LOP': [10, "IFTrecsym"],
         'QSO': [30, "IFTrecsym"]}

# The different sky areas
caps = ["NGC", "SGC", "GCcomb"]

basepath = f"/global/cfs/cdirs/desi/survey/catalogs/Y1/mocks/SecondGenMocks/"
pre_cov_files = f"/global/cfs/cdirs/desi/users/mrash/RascalC/Y1/unblinded/v{version}/"     # At the minute GCcomb uses rescaled, but NGC and SGC just no-rescaling as it's not clear to me which to use.
post_cov_files = f"/global/cfs/cdirs/desi/users/mrash/RascalC/Y1/unblinded/v{version}/"    # At the minute GCcomb uses rescaled, but NGC and SGC just no-rescaling as it's not clear to me which to use.


for t in tracers:
    for i, zs in enumerate(tracers[t]):
        for cap in caps:
                    
            pre_files = basepath + "AbacusSummitBGS/desipipe/v1/ffa/2pt/" if t == "BGS_BRIGHT-21.5" else basepath + "AbacusSummit/"
            post_files = basepath + "AbacusSummitBGS/desipipe/v1/ffa/2pt/" if t == "BGS_BRIGHT-21.5" else basepath + "AbacusSummit/"
            
            pre_name =  f"/xi/smu/allcounts_{t}_{cap}_z{zs[0]}-{zs[1]}_{imaging}_lin_nran1_njack0_split20" if t == "BGS_BRIGHT-21.5" else f"/xi/smu/allcounts_{t}_{ffa}_{cap}_{zs[0]}_{zs[1]}_{imaging}_lin_njack0_nran4_split20" 
            
            if t == 'BGS_BRIGHT-21.5':
                post_name = f"/recon_sm{recon[t][0]}_IFFT_recsym/xi/smu/allcounts_{t}_{cap}_z{zs[0]}-{zs[1]}_{imaging}_lin_nran1_njack0_split20" # if recon[t][1] is not None else None
            else:
                post_name = f"/recon_sm{recon[t][0]}/xi/smu/allcounts_{t}_{ffa}_{recon[t][1]}_{cap}_{zs[0]}_{zs[1]}_{imaging}_lin_njack0_nran4_split20" # if recon[t][1] is not None else None 0,4,20 or 60,4,20 
            
            tcov = "ELG_LOPnotqso" if "ELG" in t else t
            pre_cov_name = f"xi024_{tcov}_{cap}_{zs[0]}_{zs[1]}_{imaging}_lin4_s20-200_cov_RascalC_rescaled.txt"
            if cap in ['NGC', 'SGC']:
                pre_cov_name = f"xi024_{tcov}_{cap}_{zs[0]}_{zs[1]}_{imaging}_lin4_s20-200_cov_RascalC_Gaussian.txt"
            #post_cov_name = f"{tcov}_{recon[t][1]}_sm{recon[t][0]}_{cap}_{zs[0]}_{zs[1]}" if recon[t][1] is not None else None
            post_cov_name = pre_cov_name # if recon[t][1] # is not None else None
            
            print(os.path.exists(pre_files + 'mock0' + pre_name + '.npy'))
            print(pre_files + 'mock0' + pre_name)
            print(os.path.exists(post_files + 'mock0' + post_name + '.npy'))
            print(post_files + 'mock0' + post_name)
            # print(os.path.exists(pre_cov_files + pre_cov_name))
            # print(pre_cov_files + pre_cov_name)
           
            data = collect_xi_data(pre_files, post_files, pre_cov_files, post_cov_files, pre_name, post_name, pre_cov_name, post_cov_name, zs, recon[t][0], range(nmocks[t][0], nmocks[t][1]), rpcut, imaging)
            plot_xi(data, post=False if post_name is None else True) # Plot the data to check things
#
#allcounts_BGS_BRIGHT-21.5_NGC_z0.1-0.4_default_FKP_nran1_njack0_split20
#allcounts_BGS_BRIGHT-21.5_GCcomb_z0.1-0.4_default_FKP_lin_nran1_njack0_split20.npy

# Pickle AbacusSummit Pk mocks for Barry 

In [None]:
# Import the necessary packages, set up the fiducial cosmology and save the DESI template
import os
import pickle
import numpy as np
import scipy as sp
import pandas as pd
from astropy.io import ascii
import matplotlib.pyplot as plt
from scipy.interpolate import splrep, splev
from cosmoprimo import PowerSpectrumBAOFilter
from cosmoprimo.fiducial import DESI
from pypower import BaseMatrix, CatalogFFTPower, CatalogFFTCorr, PowerSpectrumMultipoles, PowerSpectrumSmoothWindow, PowerSpectrumSmoothWindowMatrix, PowerSpectrumOddWideAngleMatrix, setup_logging
from pycorr import TwoPointCorrelationFunction, project_to_multipoles

cosmo = DESI()
print(cosmo["Omega_b"]*cosmo["h"]**2, cosmo["Omega_cdm"]*cosmo["h"]**2, cosmo["Omega_m"]*cosmo["h"]**2 - cosmo["Omega_b"]*cosmo["h"]**2)
print(cosmo["A_s"], cosmo["n_s"], cosmo["tau_reio"])
print(np.sum(cosmo["m_ncdm"]))

# Save the default DESI template to a file
k_min = 1e-4
k_max = 5
k_num = 2000
kl = np.logspace(np.log(k_min), np.log(k_max), k_num, base=np.e)
pkz = cosmo.get_fourier().pk_interpolator()
pk = pkz.to_1d(z=0)
pkv = pk(kl)
pknow = PowerSpectrumBAOFilter(pk, engine='wallish2018').smooth_pk_interpolator()
pksmv = pknow(kl)

In [None]:
# Useful utility function to collate some Xi data
def collect_pk_data(pre_files, post_files, pre_cov_files, post_cov_files, pre_files_name, post_files_name, pre_cov_name, post_cov_name, zs, reconsmooth, mocks, rpcut, imaging):

    pre_data, post_data = None, None
    
    pre_mocks = get_pk(pre_files, pre_name, mocks, rpcut, imaging) if pre_files_name is not None else None
    post_mocks = get_pk(post_files, post_name, mocks, rpcut, imaging) if post_files_name is not None else None
    
    pre_cov = get_pk_cov(pre_cov_files, pre_cov_name, rpcut, imaging) if pre_cov_files is not None else None
    post_cov = get_pk_cov(post_cov_files, post_cov_name, rpcut, imaging) if post_cov_files is not None else None
    
    if pre_files is not None:
        winmat, wam_reshape = getwin(pre_mocks[0]["k"].to_numpy(), post_cov_files, post_name, rpcut, imaging)
    else:
        winmat, wam_reshape = getwin_dummy(pre_mocks[0]["k"].to_numpy())
        
    rp = f" {imaging} rpcut2.5" if rpcut else f" {imaging}" 
        
    split = {
        "n_data": 1,
        "pre-recon data": pre_data,
        "pre-recon cov": pre_cov,
        "post-recon data": post_data,
        "post-recon cov": post_cov,
        "pre-recon mocks": pre_mocks,
        "post-recon mocks": post_mocks,
        "cosmology": {
            "om": cosmo["Omega_m"],
            "h0": cosmo["h"],
            "z": (zs[1]+zs[0])/2.0,
            "ob": cosmo["Omega_b"],
            "ns": cosmo["n_s"],
            "mnu": np.sum(cosmo["m_ncdm"]),
            "reconsmoothscale": reconsmooth,
        },
        "name": "DESI SecondGen " + f"sm{reconsmooth} " +  ("_").join(pre_files_name.split("_")[1:]) + rp,
        "winfit": winmat,
        "winpk": None,  # We can set this to None; Barry will set it to zeroes given the length of the data vector.
        "m_mat": wam_reshape,
    }
    
    with open(f"/global/u1/a/abbew25/barryrepo/Barry/cosmodesi_KP4ELG_examplecode_make_picklefiles/DESI_SecondGen_sm{reconsmooth}_" + ("_").join(pre_files_name.split("_")[1:]).lower() + ("_").join(rp.split(" ")) + "_pk.pkl", "wb") as f:
        pickle.dump(split, f)
        
    return split

# Power Spectrum
def get_pk(loc, name, mocks, rpcut, imaging):
    
    rp = "_rpcut2.5" if rpcut else "" 
    
    # Overwrite the <k> with the bin centres as we now use a binning matrix to correct to <P(k)>
    ks = np.linspace(0.0, 0.4, 80, endpoint=False) + 0.0025
    #ks = None
    
    pks = []
    for mock in mocks:
        if 'BGS_BRIGHT-21.5' in name and mock == 13:
            continue
        infile = loc + "mock" + str(mock) + name + ".npy"

        data = PowerSpectrumMultipoles.load(infile)
        data.slice(slice(0,400,5))
        df = pd.DataFrame(np.vstack(data(ell=[0,2,4], return_k=True)).T.real, columns=["k", "pk0", "pk2", "pk4"])
        df["pk1"] = np.zeros(len(df["k"]))
        df["pk3"] = np.zeros(len(df["k"]))
        df["nk"] = data.nmodes
        if ks is not None:
            df["k"] = ks
        pks.append(df[["k", "pk0", "pk1", "pk2", "pk3", "pk4"]])
    
    return pks

# Power Spectrum covariance matrix.
def get_pk_cov(loc, name, rpcut, imaging):

    rp = "_rpcut2.5" if rpcut else "" 
    infile = loc + name + ".txt"
    
    cov_input = pd.read_csv(infile, comment="#", delim_whitespace=True, header=None).to_numpy()
    nks = int(np.shape(cov_input)[0]/3)
    nin = nks
    cov = np.eye(5 * nks)
    cov[:nks, :nks] = cov_input[:nks, :nks]
    cov[:nks, 2 * nks : 3 * nks] = cov_input[:nks, nin : nin + nks]
    cov[:nks, 4 * nks : 5 * nks] = cov_input[:nks, 2 * nin : 2 * nin + nks]
    cov[2 * nks : 3 * nks, :nks] = cov_input[nin : nin + nks, :nks]
    cov[2 * nks : 3 * nks, 2 * nks : 3 * nks] = cov_input[nin : nin + nks, nin : nin + nks]
    cov[2 * nks : 3 * nks, 4 * nks : 5 * nks] = cov_input[nin : nin + nks, 2 * nin : 2 * nin + nks]
    cov[4 * nks : 5 * nks, :nks] = cov_input[2 * nin : 2 * nin + nks, :nks]
    cov[4 * nks : 5 * nks, 2 * nks : 3 * nks] = cov_input[2 * nin : 2 * nin + nks, nin : nin + nks]
    cov[4 * nks : 5 * nks, 4 * nks : 5 * nks] = cov_input[2 * nin : 2 * nin + nks, 2 * nin : 2 * nin + nks]
    
    #plt.imshow(cov/np.sqrt(np.outer(np.diag(cov), np.diag(cov))))
    #plt.show()
    
    # Check the covariance matrix is invertible
    v = np.diag(cov @ np.linalg.inv(cov))
    if not np.all(np.isclose(v, 1)):
        print("ERROR, setting an inappropriate covariance matrix that is almost singular!!!!")

    return cov

# Read's in window and wideangle matrices
def getwin(ks, loc, name, rpcut, imaging):

    rp = "_rpcut2.5" if rpcut else "" 

    #wam_data = BaseMatrix.load("/global/cfs/cdirs/desi/survey/catalogs/Y1/LSS/iron/LSScats/v0.6/blinded/pk/wmatrix_smooth_" + ("_").join([name.split("/")[-1].split("_")[1]]+name.split("/")[-1].split("_")[4:]) + f"_{imaging}_lin{rp}.npy")
    loc = '/'.join(loc.split('/')[:-2]) 
    if 'BGS' in name:
        winname = "/wmatrix_smooth_" + '_'.join(name.split("/")[-1].split("_")[1:-6])
    else:
        winname = "/wmatrix_smooth_" + '_'.join(name.split("/")[-1].split("_")[1:-3])
        
    print(winname)
    if 'recsym' in winname:
        winname = winname.split('_')# .remove('IFTrecsym')
        winname.remove('IFTrecsym')
        winname.remove('ffa')
        if 'LOP' in winname:
            winname[winname.index('LOP')] = 'LOPnotqso'
        winname = '_'.join(winname)
    if winname[-4] == '-':
        winname = winname[:-8] + winname[-7:-4] + '_' + winname[-3:]
    infile = loc + winname + f"_{imaging}_lin{rp}.npy"
    wam = BaseMatrix.load(infile)
    
    # BGS_BRIGHT-21.5_NGC_z0.1-0.4_default_FKP_lin_nran18_cellsize6_boxsize4000_default_FKP_lin.npy
    #plt.imshow(np.log10(np.fabs(wam.value)), aspect='auto')
    #plt.show()
    
    wam = wam[:,:len(wam.xout[0])// 5 * 5]
    #print(wam.xout[0], np.shape(wam.value))
    old_wam = wam[:, :len(wam.xout[0]) // 5 * 5:5]
    #print(np.shape(wam[:, :len(wam.xout[0]) // 5 * 5:5]))
    wam.rebin_x(factorout=5)
    #print(old_wam.value/wam.value)
    kout = wam.xout[0] if ks is None else ks
        
    # This window function only has even multipoles as outputs and includes wide angle effects, so let's pad it with 
    # some zeros where the output odd multipoles would be so Barry is happy and then create a dummy wide angle matrix.
    w_transform = np.zeros((5 * len(kout), 6 * len(wam.xin[0])))
    wam_reshape = np.hsplit(wam.value, 3)
    for j in range(3):
        for i in range(3):
            w_transform[2*j*len(kout): (2*j+1)*len(kout) , 2*i*len(wam.xin[0]) : (2*i+1)*len(wam.xin[0])] = wam_reshape[j][i*len(wam.xin[0]) : (i+1)*len(wam.xin[0])].T
    
    matrix = np.zeros((6 * len(wam.xin[0]), 3 * len(wam.xin[0])))
    matrix[: len(wam.xin[0]), : len(wam.xin[0])] = np.diag(np.ones(len(wam.xin[0])))
    matrix[2 * len(wam.xin[0]) : 3 * len(wam.xin[0]), len(wam.xin[0]) : 2 * len(wam.xin[0])] = np.diag(np.ones(len(wam.xin[0])))
    matrix[4 * len(wam.xin[0]) : 5 * len(wam.xin[0]), 2 * len(wam.xin[0]) :] = np.diag(np.ones(len(wam.xin[0])))
            
    #plt.imshow(np.log10(np.fabs(w_transform)), aspect='auto')
    #plt.show()

    #plt.imshow((w_transform @ matrix).T, aspect='auto')
    #plt.show()
    
    # The conversion matrix M from Beutler 2019. Used to compute the odd multipole models given the even multipoles. In the absence of wide angle effects, or if we don't care about
    # the odd multipoles, we can set this to a block matrix with identity matrices in the appropriate places, as is done here.

    res = {"w_ks_input": wam.xin[0], "w_k0_scale": np.zeros(len(wam.xin[0])), "w_transform": w_transform, "w_ks_output": kout}
    winmat = {1: res}   # Step size is one, but we could modify this to contain other stepsizes too.
    
    # Wideangle matrix already included in window matrix, so pass None for wide-angle matrix so that Barry knows
    return winmat, matrix

# Window function matrix. The window functions are stored in a dictionary of 'step sizes' i.e., how many bins get stuck together relative to the 
# pk measurements so that we can rebin the P(k) at run time if required. Each step size is a dictionary with:
#    the input and output k binning (w_ks_input, w_ks_output), the window function matrix (w_transform) and integral constraint (w_k0_scale).
# The window function assumes 6 input and 5 output multipoles. For cubic sims, we can set the integral constraint to zero and window matrix to a binning matrix, as is done here.
def getwin_dummy(ks):
    
    dk = ks[1] - ks[0]
    ks_input = np.logspace(-3.0, np.log10(0.5), 500)

    binmat = np.zeros((len(ks), len(ks_input)))
    for ii in range(len(ks_input)):

        # Define basis vector
        pkvec = np.zeros_like(ks_input)
        pkvec[ii] = 1

        # Define the spline:
        pkvec_spline = splrep(ks_input, pkvec)

        # Now compute binned basis vector:
        tmp = np.zeros_like(ks)
        for i, kk in enumerate(ks):
            kl = kk - dk / 2
            kr = kk + dk / 2
            kin = np.linspace(kl, kr, 100)
            tmp[i] = np.trapz(kin**2 * splev(kin, pkvec_spline, ext=3), x=kin) * 3 / (kr**3 - kl**3)

        binmat[:, ii] = tmp

    w_transform = np.zeros((5 * ks.size, 6 * ks_input.size))
    for i in range(5):
        w_transform[i*ks.size: (i+1)*ks.size , i*ks_input.size : (i+1)*ks_input.size] = binmat
    
    # The conversion matrix M from Beutler 2019. Used to compute the odd multipole models given the even multipoles. In the absence of wide angle effects, or if we don't care about
    # the odd multipoles, we can set this to a block matrix with identity matrices in the appropriate places, as is done here.
    matrix = np.zeros((6 * ks_input.size, 3 * ks_input.size))
    matrix[: ks_input.size, : ks_input.size] = np.diag(np.ones(ks_input.size))
    matrix[2 * ks_input.size : 3 * ks_input.size, ks_input.size : 2 * ks_input.size] = np.diag(np.ones(ks_input.size))
    matrix[4 * ks_input.size : 5 * ks_input.size, 2 * ks_input.size :] = np.diag(np.ones(ks_input.size))
    
    res = {"w_ks_input": ks_input, "w_k0_scale": np.zeros(ks.size), "w_transform": w_transform, "w_ks_output": ks}
    return {1: res}, matrix  # Step size is one  

# Plot the correlation function, for sanity checking
def plot_pk(split, pre=True, post=True):
        
    color = ["r", "b", "g"]
    k = split["post-recon mocks"][0]["k"]
    nmocks = len(split["pre-recon mocks"])
    label = [r"$P_{0}(k)$", r"$P_{2}(k)$", r"$P_{4}(k)$"]
        
    if pre:
        for m, pk in enumerate(["pk0", "pk2", "pk4"]):
            yerr = k * np.sqrt(np.diag(split["pre-recon cov"]))[m * len(k) : (m + 1) * len(k)]
            plt.errorbar(
                k,
                k * np.mean([split["pre-recon mocks"][i][pk] for i in range(nmocks)], axis=0),
                yerr=yerr,
                marker="o",
                ls="None",
                c=color[m],
                label=label[m],
            )
            for i in range(nmocks):
                plt.errorbar(k, k * split["pre-recon mocks"][i][pk], marker="None", ls="-", c='k', alpha=1.0 / nmocks**(3.0/4.0))
        plt.xlabel(r"$k$")
        plt.ylabel(r"$k\,\times pk(k)$")
        plt.title(split["name"] + " Prerecon")
        plt.legend(loc='upper right')
        plt.show()
        
    if post:
        for m, pk in enumerate(["pk0", "pk2", "pk4"]):
            yerr = k * np.sqrt(np.diag(split["post-recon cov"]))[m * len(k) : (m + 1) * len(k)]
            plt.errorbar(
                k,
                k * np.mean([split["post-recon mocks"][i][pk] for i in range(nmocks)], axis=0),
                yerr=yerr,
                marker="o",
                ls="None",
                c=color[m],
                label=label[m],
            )
            for i in range(nmocks):
                plt.errorbar(k, k * split["post-recon mocks"][i][pk], marker="None", ls="-", c='k', alpha=1.0 / nmocks**(3.0/4.0))
        plt.ylabel(r"$k\,\times pk(k)$")
        plt.title(split["name"] + " Postrecon")
        plt.legend(loc='upper right')
        plt.show()

In [None]:
# The catalogue version
import os 
version = 1.2
ffa = "ffa"               # Flavour of fibre assignment. Can be "ffa" for fast fiber assign, or "complete"
rpcut = False             # Whether or not to include the rpcut
imaging = "default_FKP"   # What form of imaging systematics to use. Can be "default_FKP", "default_FKP_addSN", or "default_FKP_addRF"

# This is a dictionary of all the combinations of dataset that we have and their redshift bins.
tracers = {'BGS_BRIGHT-21.5': [[0.1,0.4]],
           'LRG': [[0.4, 0.6], [0.6, 0.8], [0.8, 1.1]], 
           'ELG_LOP': [[0.8, 1.1], [1.1, 1.6]],
           'QSO': [[0.8, 2.1]]}

# How many complete mocks are available for each tracer? 
# While the mocks are still being processed, this allows us to skip over the missing entries
nmocks = {'BGS_BRIGHT-21.5': [0,25], 'LRG': [0,25], 'ELG_LOP': [0,25], 'QSO': [0,25]}

# This dictionary specifies the particulars of how reconstruction was run on each tracer. First entry is smoothing scale, second is type of recon. 
# QSO has no recon, so set to None so it can be skipped over later.
recon = {'BGS_BRIGHT-21.5': [15, "IFTrecsym"],
         'LRG': [10, "IFTrecsym"], 
         'ELG_LOP': [10, "IFTrecsym"],
         'QSO': [30, "IFTrecsym"]}

# The different sky areas
caps = ["NGC", "SGC", "GCcomb"]

basepath = f"/global/cfs/cdirs/desi/survey/catalogs/Y1/mocks/SecondGenMocks/"
pre_cov_files = f"/global/cfs/cdirs/desi/survey/catalogs/Y1/LSS/iron/LSScats/v0.6/blinded/pk/covariances/"
post_cov_files = f"/global/cfs/cdirs/desi/survey/catalogs/Y1/LSS/iron/LSScats/v0.6/blinded/pk/covariances/"    # No post recon covariances yet?


for t in tracers:
    for i, zs in enumerate(tracers[t]):
        for cap in caps:
                    
            pre_files = basepath + "AbacusSummitBGS/desipipe/v1/ffa/2pt/" if t == "BGS_BRIGHT-21.5" else basepath + "AbacusSummit/"
            post_files = basepath + "AbacusSummitBGS/desipipe/v1/ffa/2pt/" if t == "BGS_BRIGHT-21.5" else basepath + "AbacusSummit/"
            
            pre_name =  f"/pk/pkpoles_{t}_{cap}_z{zs[0]}-{zs[1]}_{imaging}_lin_nran18_cellsize6_boxsize4000" if t == "BGS_BRIGHT-21.5" else f"/pk/pkpoles_{t}_{ffa}_{cap}_{zs[0]}_{zs[1]}_{imaging}_lin" 
            
            if t == 'BGS_BRIGHT-21.5':
                post_name = f"/recon_sm{recon[t][0]}_IFFT_recsym/pk/pkpoles_{t}_{cap}_z{zs[0]}-{zs[1]}_{imaging}_lin_nran18_cellsize6_boxsize4000" # if recon[t][1] is not None else None
            else:
                post_name = f"/recon_sm{recon[t][0]}/pk/recon_sm{recon[t][0]}/pk/pkpoles_{t}_{ffa}_{recon[t][1]}_{cap}_{zs[0]}_{zs[1]}_{imaging}_lin" # if recon[t][1] is not None else None
            
            tcov = "ELG_LOPnotqso" if "ELG" in t else t
            pre_cov_name = f"cov_gaussian_pre_{tcov}_{cap}_{zs[0]}_{zs[1]}_{imaging}_lin"
            #post_cov_name = f"{tcov}_{recon[t][1]}_sm{recon[t][0]}_{cap}_{zs[0]}_{zs[1]}" if recon[t][1] is not None else None
            post_cov_name = pre_cov_name #if recon[t][1] is not None else None
            
            data = collect_pk_data(pre_files, post_files, pre_cov_files, post_cov_files, pre_name, post_name, pre_cov_name, post_cov_name, zs, recon[t][0], range(nmocks[t][0], nmocks[t][1]), rpcut, imaging)
            plot_pk(data, post=False if post_name is None else True) # Plot the data to check things
            #print(pre_name)
            # print(post_name)
            
            # print(os.path.exists(pre_files + 'mock0' + pre_name + '.npy'))
            # print(pre_files + 'mock0' + pre_name)
            # print(os.path.exists(post_files + 'mock0' + post_name + '.npy'))
            # print(post_files + 'mock0' + post_name)
            
            #print(os.path.exists(pre_cov_files + pre_cov_name))
            #print(pre_cov_files + pre_cov_name)
                    