# Measuring the Galaxy-LyaForest Cross-Correlation from CLAMATO DR2

Here, we carry out the first cross-correlation measurement of the Ly-alpha forest measured by CLAMATO DR2, and coeval galaxies from MOSDEF, 3D-HST and zCOSMOS-Deep (separately for each sample). We use the simple estimator from Font-Ribera+ 2012 DLA-Forest cross-correlation paper:

\begin{equation*}
\xi_A = \frac{\sum_{i\in A} w_i \delta_{Fi}}{\sum_{i\in A} w_i},
\end{equation*}

where 

\begin{equation*}
w_i =  \left[\sigma^2_F(z_i) + \frac{\sigma_{N,i}^2}{C_i^2 \bar{F}^2(z_i)}\right]^{-1}
\end{equation*}

and $\sigma_F^2(z_i) = 0.065 [(1+z_i)/3.25]^{3.8}$.

This uses the np.histogram2d function twice, to compute the numerator and denominator of the estimator around each galaxy.

The pixel data needs to be generated using the IDL script GEN_CROSSCORR_INPUT.PRO, and the mean-flux shoudl first be calculated with CALC_MEANFLUX.IPYNB

### Read in Ly-a forest pixels

In [None]:
import numpy as np
import time as time
import os

import lyafxcorr_kg as xcorr
import constants

# Set up matplotlib and use a nicer set of plot parameters
%config InlineBackend.rc = {}
import matplotlib as mpl
mpl.rc('mathtext',fontset='stixsans')
mpl.rc('figure', facecolor="white")
#matplotlib.rc_file("../../templates/matplotlibrc")
import matplotlib.pyplot as plt
#import matplotlib.colors as colors
%matplotlib inline

import astropy.table
from astropy.cosmology import FlatLambdaCDM
from astropy.io import fits
from astropy.io import ascii
from astropy.table import Table
from astropy import units as u
from astropy.coordinates import SkyCoord

def taueff_evo(z):
    return 0.001845 * (1.+z)**3.924

# Define cosmology
cosmo = constants.COSMOLOGY

lyapix = xcorr.lyapix(os.path.join(constants.CLAMATO_DIR_BASE, f"pixel_radecz_cl2020_{constants.DATA_VERSION}.bin") ,cosmo=cosmo)

print("Read in %i Ly-a forest pixels" % lyapix.npix)
npix = lyapix.npix

fig, ax = plt.subplots()
binwidth = 50
histdata = lyapix.coord.distance.value

ax.hist(histdata,bins=np.arange(min(histdata),max(histdata) + binwidth, binwidth))
plt.show()

# Carry out mean-flux correction
fmean_str = ascii.read(os.path.join(constants.CLAMATO_DIR_BASE, f'fmean_measured_{constants.DATA_VERSION}.dat'))
zmid = fmean_str['zmid']
F_mean = fmean_str['F_mean']

#Fcorr = np.interp(lyapix.z, zmid, F_mean) / np.exp(-taueff_evo(lyapix.z))
#lyapix.delta = ((1.+lyapix.delta)/np.exp(-taueff_evo(lyapix.z)))-1.


### Read in galaxies and generate randoms
We use the catalog created with GRAB_COEVAL_GAL.IPYNB

At the same time, also generate mock catalog 

In [None]:
galfil = os.path.join(constants.GAL_DIR_BASE, f'cat_galxcorr_cl2020_uniq_{constants.DATA_VERSION}.dat')
gal = ascii.read(galfil, format='ipac')

# Drop ZFIRE galaxies.
included_surveys = ['3DHST', 'CLAMATO', 'MOSDEF', 'VUDS', 'zDeep']
indices_to_drop = []
for i in range(len(gal)):
    if gal[i]['source'] not in included_surveys:
        indices_to_drop.append(i)
gal.remove_rows(indices_to_drop)

print(f'Stacked catalog has {len(gal)} galaxies')

specz_cat = ascii.read(os.path.join(constants.GAL_DIR_BASE, 'all_specz_v3_comb_COSMOS2020_v3.dat'))

indices_to_drop = []
for i in range(len(specz_cat)):
    if specz_cat[i]['source'] not in included_surveys:
        indices_to_drop.append(i)
specz_cat.remove_rows(indices_to_drop)

del specz_cat['id']
del specz_cat['zspec']
specz_cat.rename_column('ID_specz', 'id')
specz_cat['id'] = specz_cat['id'].astype(int)

gal = astropy.table.join(gal, specz_cat, keys='id')

print(f'After stellar mass join, galaxy catalog has {len(gal)} galaxies.')

# Use best-fit (minimum chi2) stellar masses. KG says best-fit, and also probably not much difference.
log_smass_obs = gal['Ms_best']

# Drop the two galaxies which have negative log masses.
print(f'{np.sum(log_smass_obs <= 0)} galaxies have negative log stellar masses; dropping.')
print(f'{np.sum(np.isnan(log_smass_obs))} galaxies have NaN stellar masses; also dropping these.')
gal = gal[log_smass_obs > 0]
log_smass_obs = log_smass_obs[log_smass_obs > 0]

print(f'After drops, final stacked catalog has {len(gal)} galaxies.')

In [None]:
# Plot histogram of stellar masses
plt.hist(log_smass_obs, bins=50);
plt.xlabel('Best fit log stellar mass [M_sun]')
plt.ylabel('# galaxies')
# plt.axvline(np.percentile(log_smass_obs, 25), color='black')
# plt.axvline(np.percentile(log_smass_obs, 50), color='black')
# plt.axvline(np.percentile(log_smass_obs, 75), color='black', label='25/50/75 percentiles')

plt.axvline(np.percentile(log_smass_obs, 33.3), color='black')
plt.axvline(np.percentile(log_smass_obs, 66.6), color='black', label='33.3/66.6 percentiles')

plt.legend()

In [None]:
smass_bin_boundaries = [-np.inf, np.quantile(log_smass_obs, 1/3), np.quantile(log_smass_obs, 2/3), np.inf]

binned_Coord = []

for i in range(len(smass_bin_boundaries) - 1):
    lb, ub = smass_bin_boundaries[i], smass_bin_boundaries[i + 1]
    mask = (log_smass_obs >= lb) & (log_smass_obs < ub)
    masked_cat = gal[mask]
    avg_smass = np.mean(log_smass_obs[mask])
    print(f'Log smass {lb} - {ub} | # gal {np.sum(mask)} | Average log smass {avg_smass}')
    binned_Coord.append((lb, ub, avg_smass, SkyCoord(ra=masked_cat['ra'], dec=masked_cat['dec'],
                                                     distance=cosmo.comoving_distance(masked_cat['zspec']))))

### Read in bin edges 

In [None]:
PiBin_fil = os.path.join(constants.XCORR_DIR_BASE, 'bins23_pi_0-30hMpc.txt')
SigBin_fil = os.path.join(constants.XCORR_DIR_BASE, 'bins10_sigma_0-30hMpc.txt')

PiBins0 = ascii.read(PiBin_fil)
SigBins0 = ascii.read(SigBin_fil)

PiEdges = PiBins0['pi_edges'].data
SigEdges = SigBins0['sigma_edges'].data

# Convert bin boundaries from Mpc/h to Mpc
PiEdges  = PiEdges/(len(PiEdges)*[cosmo.h])
SigEdges = SigEdges/(len(SigEdges)*[cosmo.h])

print('Pi bin edges in Mpc:')
print(PiEdges)
print('Sigma bin edges in Mpc:')
print(SigEdges)


PiBound = (min(PiEdges), max(PiEdges) )

### Compute Cross-Correlation For Stellar Mass Bins


In [None]:
base_dir = os.path.join(constants.XCORR_DIR_BASE, 'stacked')

for smass_lb, smass_ub, avg_smass, Coord in binned_Coord:
    XCorr, _ = xcorr.xcorr_gal_lya(Coord, lyapix, SigEdges, PiEdges, cosmo=cosmo)
    np.save(os.path.join(base_dir, f"xcorr_stacked_{avg_smass}_globalf_{constants.DATA_VERSION}.npy"), XCorr.value)
    
    # Plotting code
    X, Y = np.meshgrid(SigEdges, PiEdges)

    XCorrArr = np.rot90(XCorr)
    XCorrArr = np.flipud(XCorrArr)
    
    fig = plt.gcf()
    ax1 = plt.gca()

    SigMax = 25.
    PiMin = -30.
    PiMax = 30.

    pcm=ax1.pcolormesh(X, Y, XCorrArr,cmap='jet_r',vmin=-0.2, vmax=0.15 )
    ax1.set_aspect('equal')
    ax1.set_xlim(np.min(X), SigMax)
    ax1.set_ylim(PiMin, PiMax)
    ax1.set_xlabel(r'$\sigma\; (\mathrm{cMpc})$')
    ax1.set_ylabel(r'$\pi\; (\mathrm{cMpc})$')
    ax1.set_title(f'Stacked {smass_lb} - {smass_ub}')

    fig.colorbar(pcm, ax=ax1)
    fig.subplots_adjust(wspace=-0.2)

    plt.show()