In [None]:
%load_ext autoreload
%autoreload 2

import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import fitsio
from pycorr import TwoPointCorrelationFunction, TwoPointEstimator, project_to_multipoles, project_to_wp, utils, setup_logging
from scipy.optimize import curve_fit
from LSS.common_tools import mknz

from dataloc import *

# MAKE ALL PLOTS TEXT BIGGER
plt.rcParams.update({'font.size': 16})
# But legend a bit smaller
plt.rcParams.update({'legend.fontsize': 12})





















## No longer needed

In [None]:
# CREATE BINS FOR WP CALCULATION
edges = np.geomspace(0.1, 20, 16)

# For my own use
with open(WP_RADIAL_BINS_DESI_FILE, 'w') as f:
    for i in range(len(edges)-1):
        f.write(f'{edges[i]:.8f} {edges[i+1]:.8f}\n')

# For xirunpc.py script
with open(WP_RADIAL_EDGE_DESI_FILE, 'w') as f:
    for i in range(len(edges)):
        f.write(f'{edges[i]:.8f}\n')

In [None]:
def check_cols_for_tbl(path, colname):
    if os.path.isfile(path) == False:
        print(f'{path} not found')
        return
    cols = fitsio.FITS(path)[1].get_colnames()
    if colname not in cols:
        print(f'{colname} not in {path}')
    else:
        print(f'{colname} found in {path}')
    print(cols)
check_cols_for_tbl('/dvs_ro/cfs/cdirs/desi/survey/catalogs/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_full_HPmapcut.dat.fits', 'QUIESCENT')
check_cols_for_tbl('/global/cfs/cdirs/desi/users/ianw89/clustering/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_NGC_clustering.dat.fits', 'QUIESCENT')
check_cols_for_tbl('/global/cfs/cdirs/desi/users/ianw89/clustering/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_NGC_0_clustering.ran.fits', 'QUIESCENT')

In [None]:
%pip install corrfunc

## Definitions

In [None]:
def get_wp_for(tracer, weights, survey, verspec, ver, bins, zmax, nran, njack, quiescent=None):
    dir = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER, survey, 'LSS', verspec, 'LSScats', ver, 'rppi')

    if not os.path.exists(dir):
        print("WARNING: Directory does not exist: ", dir)
        return None

    if quiescent is not None:
        addon = '_QUIESCENT' + ('1' if quiescent else '0')
    else:
        addon = ''
        
    wp_fn = os.path.join(dir, f'wp_{tracer}_GCcomb_0.001_{zmax}_{weights}_{bins}_njack{njack}_nran{nran}_split20{addon}.txt')

    if not os.path.exists(wp_fn):
        print("WARNING: File does not exist: ", wp_fn)
        return None

    return np.loadtxt(wp_fn)

def get_fn_for(weights, survey, verspec, ver, zmax, nran, njack, quiescent=None):
    dir = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER, survey, 'LSS', verspec, 'LSScats', ver, 'rppi')

    if not os.path.exists(dir):
        print("WARNING: Directory does not exist: ", dir)
        return None

    if quiescent is not None:
        addon = '_QUIESCENT' + ('1' if quiescent else '0')
    else:
        addon = ''
        
    allcounts_fn = os.path.join(dir, f'allcounts_BGS_BRIGHT_GCcomb_0.001_{zmax}_{weights}_custom_njack{njack}_nran{nran}_split20{addon}.npy')
    return allcounts_fn
    #if not os.path.exists(allcounts_fn):
    #    print("WARNING: File does not exist: ", allcounts_fn)
    #    return None

   # return np.load(allcounts_fn, allow_pickle=True)

## Jackknife Tests

In [None]:
#quiescent = False
quiescent = None # False
#nran = 18
nran = 8
#zmax = 0.22620
zmax = 0.14977
tracer = "BGS_BRIGHT" 
njack = [4, 8, 16, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 192, 256, 512, 1028]
jacktest_results = []
jacktset_obj = []
jacktest_cov = []
jacktest_corr = []
for nj in njack:
    jacktest_results.append(get_wp_for(tracer, 'pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', 'custom', zmax, nran, nj, quiescent=quiescent))
    tpc = TwoPointEstimator.load(get_fn_for('pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', zmax, nran, nj, quiescent=quiescent))
    s, xiell, cov = tpc.get_corr(return_sep=True, return_cov=True, mode='wp')
    jacktset_obj.append(tpc)
    jacktest_cov.append(cov)

for cov in jacktest_cov:    
    # ~C_ij = C_ij / sqrt(C_ii C_jj)
    jacktest_corr.append(cov / np.sqrt(np.outer(np.diag(cov), np.diag(cov))))

In [None]:
# Test how the cov matrix is changing as the njack increases using subplots
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(15, 15))
axes = axes.flatten()

for i in range(9):
    ii=(i+3)*2
    im = axes[i].imshow(jacktest_corr[ii], cmap='coolwarm', vmin=-1, vmax=1)
    axes[i].set_title(f"Corr Matrix (njack={njack[ii]})")
    
    # Remvoe bin # labels
    axes[i].set_xticks(np.arange(len(jacktest_corr[ii])))
    axes[i].set_xticklabels([])
    axes[i].set_yticks(np.arange(len(jacktest_corr[ii])))
    axes[i].set_yticklabels([])

# Create a single colorbar for all subplots
fig.subplots_adjust(right=0.85)
cbar_ax = fig.add_axes([0.87, 0.15, 0.03, 0.7])
cbar = fig.colorbar(im, cax=cbar_ax)
cbar.set_label('Correlation Coefficient')

# Switch from bin numbers to rp values
plt.tight_layout(rect=[0, 0, 0.85, 1])
plt.show()

In [None]:
# PLOT THE SIGMA OF THE JACKKNIFE TESTS ONLY
plt.figure()
for i in range(len(jacktest_results)):
    if i%2 == 0:
        continue
    wp = jacktest_results[i]
    if wp is not None:
        # color 128 one specially
        color = 'red' if njack[i] == 64 else [0.0, 1-i/len(njack), 0.0]
        plt.plot(wp[:,0], wp[:,3], label=f'njack={njack[i]}', marker='o', color=color)
plt.xscale('log')
plt.yscale('log')
plt.xlabel(r'$r_p$ $[h^{-1} Mpc]$')
plt.ylabel(r'$\sigma_{\omega_p}$')
plt.legend()

In [None]:
# Let's fit the results to a power law
def power_law(x, a, b):
    return a * x**b

rps = jacktest_results[0][:,0] # They are all the same

to_fit = jacktest_results[10]
popt, pcov = curve_fit(power_law, rps, to_fit[:,2], sigma=to_fit[:,3], absolute_sigma=True)
print(popt)

# And plot the data and the fit
plt.figure()
for i in range(len(jacktest_results)):
    wp = jacktest_results[i]
    if wp is not None:
        plt.errorbar(wp[:,0], wp[:,2], yerr=wp[:,3], label=f'njack={njack[i]}', fmt='.', capsize=2)
plt.plot(rps, power_law(rps, *popt), label='Fit', linestyle='--')
plt.xscale('log')
plt.yscale('log')
plt.ylabel(r'$w_p(r_p)$')
plt.xlabel(r'$r_p$ [Mpc/h]') 

def chi2(y, yfit, cov):
    return np.dot(y-yfit, np.linalg.solve(cov, y-yfit))

chi2s = []
for i in range(len(jacktest_results)):
    wp = jacktest_results[i]
    if wp is not None:
        chi2s.append(chi2(wp[:,2], power_law(wp[:,0], *popt), jacktest_cov[i]))
    else:
        chi2s.append(None)

# Plot the chi2 values as a function of njack
plt.figure()
plt.plot(njack[3:], chi2s[3:],  'o', label='Full Covariance')
plt.xlabel('njack')
plt.ylabel(r'$\chi^2$')
plt.xscale('log')
plt.legend()
plt.ylim(100,250)


In [None]:
# Now let's fit it to a broken power law
def broken_power_law(x, a1, b1, a2, b2, x0):
    return np.where(x < x0, a1 * x**b1, a2 * x**b2)

rps = jacktest_results[0][:,0] # They are all the same

to_fit = jacktest_results[10]
popt, pcov = curve_fit(broken_power_law, rps, to_fit[:,2], sigma=to_fit[:,3], absolute_sigma=True)
print(popt)

# And plot the data and the fit
plt.figure()
for i in range(len(jacktest_results)):
    wp = jacktest_results[i]
    if wp is not None:
        plt.errorbar(wp[:,0], wp[:,2], yerr=wp[:,3], label=f'njack={njack[i]}', fmt='.', capsize=2)
plt.plot(rps, broken_power_law(rps, *popt), label='Fit', linestyle='--')
plt.xscale('log')
plt.yscale('log')
plt.ylabel(r'$w_p(r_p)$')
plt.xlabel(r'$r_p$ [Mpc/h]')

# Calculate chi squared for the fit, using the covariance matrix from each njack test
def chi2(y, yfit, cov):
    return np.dot(y-yfit, np.linalg.solve(cov, y-yfit))

chi2s = []
for i in range(len(jacktest_results)):
    wp = jacktest_results[i]
    if wp is not None:
        chi2s.append(chi2(wp[:,2], broken_power_law(wp[:,0], *popt), jacktest_cov[i]))
    else:
        chi2s.append(None)

# Plot the chi2 values as a function of njack
plt.figure()
plt.plot(njack[3:], chi2s[3:],  'o', label='Full Covariance')
plt.xlabel('njack')
plt.ylabel(r'$\chi^2$')
plt.xscale('log')
plt.legend()
plt.ylim(100,250)


## Main Results

In [None]:
zmaxes = [0.06336, 0.09792, 0.14977, 0.22620, 0.33694, 0.49523] 
magbins = [-17, -18, -19, -20, -21, -22, -23]
tracer = "BGS_BRIGHT" 
jack_official = 64
red_results = []
blue_results = []
for z in zmaxes:
    red_results.append(get_wp_for(tracer, 'pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', 'custom', z, 18, jack_official, True)) 
    blue_results.append(get_wp_for(tracer, 'pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', 'custom', z, 18, jack_official, False))


In [None]:
# TODO

## SDSS Comparison

In [None]:
maglim = -22.4
zmax = 0.245169
zmin = 0.02
# Make a luminosity threshold sample like SDSS to see if n(z) is same
def make_sdsslike_cuts(fcd, maglim, zmax, zmin):
    writename = fcd.replace('.dat.fits', f'_testcut.dat.fits')  
    arr = fitsio.read(fcd)
    arr = arr[arr['Z'] < zmax]
    arr = arr[arr['Z'] > zmin]
    arr = arr[arr['ABSMAG_R'] < maglim]
    fitsio.write(writename, arr, clobber=True)

make_sdsslike_cuts('/global/cfs/cdirs/desi/users/ianw89/clustering/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_NGC_clustering.dat.fits', maglim, zmax, zmin)
make_sdsslike_cuts('/global/cfs/cdirs/desi/users/ianw89/clustering/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_SGC_clustering.dat.fits', maglim, zmax, zmin)

In [None]:
cat = '/global/cfs/cdirs/desi/users/ianw89/clustering/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_NGC_clustering_testcut.dat.fits'
ran = '/global/cfs/cdirs/desi/users/ianw89/clustering/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_NGC_0_clustering.ran.fits'
outpath = '/global/cfs/cdirs/desi/users/ianw89/clustering/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_NGC_FOOTPRINT.txt'
mknz(cat, ran, outpath, zmax=0.3)

In [None]:
cat = '/global/cfs/cdirs/desi/users/ianw89/clustering/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_SGC_clustering_testcut.dat.fits'
ran = '/global/cfs/cdirs/desi/users/ianw89/clustering/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_SGC_0_clustering.ran.fits'
outpath2 = '/global/cfs/cdirs/desi/users/ianw89/clustering/Y1/LSS/iron/LSScats/v1.5pip/BGS_BRIGHT_SGC_FOOTPRINT.txt'
mknz(cat, ran, outpath2, zmax=0.3)

In [None]:
# zmid zlow zhigh n(z) Nbin Vol_bin
# zmid zlow zhigh n(z) Nbin Vol_bin
NGC_nz = np.loadtxt(outpath)
SGC_nz = np.loadtxt(outpath2)

# Function to combine the two to get an overall density, appropiatly weighting each by volume
def combine_nz(nz1, nz2):
    combined = np.zeros((len(nz1), 6))
    combined[:,0] = nz1[:,0]
    combined[:,1] = nz1[:,1]
    combined[:,2] = nz1[:,2]
    combined[:,4] = nz1[:,4] + nz2[:,4]
    combined[:,5] = nz1[:,5] + nz2[:,5]
    combined[:,3] = combined[:,4] / combined[:,5]
    
    return combined

total_nz = combine_nz(NGC_nz, SGC_nz)

print(f"BGS BRIGHT Y1 number of galaxies with M_r < {maglim} and z < {zmax}: {total_nz[:,4].sum():,}")
print(f"SDSS Zehavi 2011 number of galaxies with M_r < -22 and z < 0.245169 in : 11,385 ")

# Function to integrate the two across a z range
def galdensity(zmin, zmax):
    # Find idx for zmin and zmax
    idx1 = np.argmin(np.abs(NGC_nz[:,0] - zmin))
    idx2 = np.argmin(np.abs(NGC_nz[:,0] - zmax))

    # NGC
    zwidth = NGC_nz[:,2] - NGC_nz[:,1]
    integrated = NGC_nz[:,3] * zwidth 
    summed = integrated[idx1:idx2].sum()

    # And now for SGC
    idx1 = np.argmin(np.abs(SGC_nz[:,0] - zmin))
    idx2 = np.argmin(np.abs(SGC_nz[:,0] - zmax))
    zwidth = SGC_nz[:,2] - SGC_nz[:,1]
    integrated = SGC_nz[:,3] * zwidth
    summed += integrated[idx1:idx2].sum()

    return summed

#galdensity(0.02, 0.4)

In [None]:
plt.figure()
#plt.plot(NGC_nz[:,0], NGC_nz[:,3], label='BGS BRIGHT NGC')
#plt.plot(SGC_nz[:,0], SGC_nz[:,3], '--', label='BGS BRIGHT SGC')
plt.plot(total_nz[:,0], total_nz[:,3], label='BGS BRIGHT')
plt.xlabel('$z$')
plt.ylabel('$n(z)~[(h/$Mpc$)^3]$')
plt.xlim(0.01, 0.26)
plt.yscale("log")
plt.legend()
plt.title("BGS BRIGHT Y1 Density; $M_r<-22$, $z<0.245$")


In [None]:
# Blue/Red Lum Bins Comparison
datafolder = PARAMS_SDSS_FOLDER
def read_sdss_wp_file(fname):
    if not os.path.exists(fname):
        print(f'File {fname} not found')
        return None, None, None
    data = np.loadtxt(fname, skiprows=0, dtype='float')
    wp = data[:,1]
    wp_err = data[:,2]
    radius = data[:,0]
    return wp,wp_err,radius

# BGS BRIGHT with zlims like what Jeremy did for SDSS
zmaxes = [0.02586, 0.0406, 0.06336, 0.0981, 0.1504]
magbins = [-17, -18, -19, -20, -21, -22]
tracer = "BGS_BRIGHT" 
red_likesdss_results = []
blue_likesdss_results = []
for z in zmaxes:
    red_likesdss_results.append(get_wp_for(tracer, 'pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', 'custom', z, 18, 0, True)) 
    blue_likesdss_results.append(get_wp_for(tracer, 'pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', 'custom', z, 18, 0, False))

for i in range(len(zmaxes)):
    magmin = magbins[i]
    magmax = magbins[i+1]

    if red_likesdss_results[i] is None and blue_likesdss_results[i] is None:
        continue

    plt.figure()
    if red_likesdss_results[i] is not None:
        if np.shape(red_likesdss_results[i])[1] == 3:
            plt.plot(red_likesdss_results[i][:,0], red_likesdss_results[i][:,2], '.', color='r')
        else:
            plt.errorbar(red_likesdss_results[i][:,0], red_likesdss_results[i][:,2], yerr=red_likesdss_results[i][:,3], fmt='.', color='r', capsize=2)
    if blue_likesdss_results[i] is not None:
        if np.shape(blue_likesdss_results[i])[1] == 3:
            plt.plot(blue_likesdss_results[i][:,0], blue_likesdss_results[i][:,2], '.', color='b')
        else:
            plt.errorbar(blue_likesdss_results[i][:,0], blue_likesdss_results[i][:,2], yerr=blue_likesdss_results[i][:,3], fmt='.', color='b', capsize=2)

    # SDSS Data
    # TODO think about error bars I'm plotting differently now
    fname=datafolder + f'wp_red_M{np.abs(magmin):d}.dat'
    wp, wp_err, radius = read_sdss_wp_file(fname)
    if wp is not None:
        plt.errorbar(radius, wp, yerr=wp_err, fmt='.', color='darkred', capsize=2, ecolor='darkred')

    fname=datafolder + f'wp_blue_M{np.abs(magmin):d}.dat'
    wp, wp_err, radius = read_sdss_wp_file(fname)
    if wp is not None:
        plt.errorbar(radius, wp, yerr=wp_err, fmt='.', color='darkblue', capsize=2, ecolor='darkblue')


    plt.xscale('log')
    plt.yscale('log')
    plt.ylabel(r'$w_p(r_p)$')
    plt.xlabel(r'$r_p$ [Mpc/h]') 
    plt.title(f'{tracer}: ${magbins[i+1]}<M_r<{magbins[i]}$  ; $z<{zmaxes[i]}$')




In [None]:
# Now do similar but for the luminosity threshold version which is not color split
def get_threshold_wp(tracer, weights, survey, verspec, ver, bins, zmax, nran, njack, quiescent=None):
    dir = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER, survey, 'LSS', verspec, 'LSScats', ver, 'rppi')
    if not os.path.exists(dir):
        print("WARNING: Directory does not exist: ", dir)
        return None
    if quiescent is not None:
        addon = '_QUIESCENT' + ('1' if quiescent else '0')
    else:
        addon = ''
    wp_fn = os.path.join(dir, f'wp_{tracer}_GCcomb_0.02_{zmax}_{weights}_{bins}_njack{njack}_nran{nran}_split20{addon}.txt')
    if not os.path.exists(wp_fn):
        print("WARNING: File does not exist: ", wp_fn)
        return None
    return np.loadtxt(wp_fn)

# BGS BRIGHT with zlims like Zehavi et al 2011 Luminosity threshold samples
zmaxes = [0.041695, 0.052536, 0.064211, 0.084892, 0.106407, 0.132425, 0.15894, 0.198804, 0.245169]
magbins = [-18, -18.5, -19, -19.5, -20, -20.5, -21, -21.5, -22.4]
thresholds_likesdss = []
for z in zmaxes:
    thresholds_likesdss.append(get_threshold_wp(tracer, 'pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', 'custom', z, 18, 0))

In [None]:
# Plot them all
plt.figure(figsize=(6, 6))
colors = ['k', 'k', 'cyan', 'blue', 'darkgreen', 'limegreen', 'k', 'r', 'magenta']
for i in [8,7,6,5,3]:

    if thresholds_likesdss[i] is None:
        continue

    if thresholds_likesdss[i] is not None:
        if np.shape(thresholds_likesdss[i])[1] == 3:
            plt.plot(thresholds_likesdss[i][:,0], thresholds_likesdss[i][:,2], 'o', color=colors[i], label=f'$M_r<{magbins[i]}$')
        else:
            plt.errorbar(thresholds_likesdss[i][:,0], thresholds_likesdss[i][:,2], yerr=thresholds_likesdss[i][:,3], fmt='.', color=colors[i], capsize=2)


# SDSS Data
sdss_magbins = [19.5, 20.5, 21, 21.5, 22]
colors = ['blue', 'limegreen', 'k', 'r', 'magenta']
sdss_magbins.reverse()
colors.reverse()
for m in sdss_magbins:
    # TODO
    fn = PARAMS_SDSS_FOLDER + f'sdss-thresh-{m:.1f}.csv'
    if not os.path.exists(fn):
        print(f'File {fn} not found')
        continue
    data = np.loadtxt(fn, skiprows=1, dtype='float', delimiter=',')
    plt.plot(data[:,0], data[:,1], '-', label=f'SDSS $M_r<-{m}$', color=colors.pop(0))


plt.xscale('log')
plt.yscale('log')
plt.ylabel(r'$w_p(r_p)$')
plt.xlabel(r'$r_p$ [Mpc/h]') 
plt.legend()
plt.ylim(2, 1500)
plt.xlim(0.1, 40)
# Add ticks on top and bottom
plt.gca().yaxis.set_ticks_position('both')
plt.gca().xaxis.set_ticks_position('both')
plt.gca().yaxis.set_tick_params(which='both', direction='in')
plt.gca().xaxis.set_tick_params(which='both', direction='in')
plt.tight_layout()

## Other tests