In [None]:
import os

import nbodykit
from nbodykit.lab import *
import numpy as np
import astropy.cosmology
import pandas as pd
import scipy.ndimage
%pylab inline

import constants

In [None]:
EVAL_Z = 2.3
BIAS_ETA_LYA = -0.2014
BETA_LYA = 1.669
GROWTH_RATE = 0.970386193694752
BIAS_LYA = BIAS_ETA_LYA * GROWTH_RATE / BETA_LYA
BETA_GAL_FUNC = lambda bias: GROWTH_RATE / bias

# From https://www.cosmosim.org/cms/simulations/bolshoip/
COSMO = astropy.cosmology.Planck13

LINEAR_MATTER_PS = nbodykit.cosmology.power.linear.LinearPower(COSMO, EVAL_Z)

SIM_DENS_PATH = '/global/homes/b/bzh/projectdir/bzh/BolshoiP/z2.5/BolshoiP_rsd_cic_n512_0094.npy'
SIM_LEN_MPCH = 250

SIM_DENS = np.load(SIM_DENS_PATH)
SIM_FLUX = np.exp(-1 * 0.226 * SIM_DENS**1.5)
SIM_FLUX = scipy.ndimage.gaussian_filter(SIM_FLUX, sigma=1 * SIM_FLUX.shape[0] / SIM_LEN_MPCH)
SIM_FLUX_MESH = nbodykit.source.mesh.ArrayMesh(SIM_FLUX, BoxSize=(SIM_LEN_MPCH, SIM_LEN_MPCH, SIM_LEN_MPCH))

SURVEY_NAMES = ['3dhst', 'clamato', 'mosdef', 'vuds', 'zDeep']
ALL_HALOS_DIR_BASE = os.path.join(constants.XCORR_DIR_BASE, 'mock', 'selected_halos')

In [None]:
# Source of this is Font-Ribera 2012 DLA paper, right above equation 1.2.
def calculate_bias(halo_cat_source):
    # def bias(xcorr_k_mu, k, mu):
    #     return (xcorr_k_mu / (1 + BETA_LYA * mu**2) / BIAS_LYA / LINEAR_MATTER_PS(k)) - (GROWTH_RATE * mu**2)
    gal_ps = nbodykit.algorithms.fftpower.FFTPower(halo_cat_source, '1d',
                                                  Nmesh=512, BoxSize=(SIM_LEN_MPCH, SIM_LEN_MPCH, SIM_LEN_MPCH),
                                                  # Nmu=50,
                                                  # kmin=(2 * np.pi / 23),
                                                  kmax=0.2,
                                                  dk=0.01
                                                 ).run()[0]
    # print(xcorr['mu'][:20])
    gal_bias = gal_ps['power'] / LINEAR_MATTER_PS(gal_ps['k'])

    # plt.plot(gal_ps['k'], gal_bias)
    # plt.title(f'{np.nanmean(gal_bias)} +- {np.nanstd(gal_bias)}')
    # plt.show()
    return np.nanmedian(gal_bias)**0.5

In [None]:
def df_to_catalogsource(df):
    # Since the data gets repeated in the z-direction, cut out >= SIM_LEN_MPCH in the z-coord.
    df = df[df.z < SIM_LEN_MPCH]
    pos = np.array([df.x, df.y, df.z]).T
    return nbodykit.source.catalog.ArrayCatalog(data={'Position': pos, 'Weight': df.smass},
                                                BoxSize=(SIM_LEN_MPCH, SIM_LEN_MPCH, SIM_LEN_MPCH))

halo_cats = [df_to_catalogsource(pd.read_csv(os.path.join(ALL_HALOS_DIR_BASE, f'all_halos_{s}.csv'))) for s in SURVEY_NAMES]

In [None]:
plt.imshow(halo_cats[0].to_mesh(Nmesh=128).preview()[:, :, 64])

In [None]:
# for t, c in zip(SURVEY_NAMES, halo_cats):
#     plt.hist(c['smass'], bins=20, range=(8, 12))
#     plt.title(t)
#     plt.show()

In [None]:
survey_biases = [calculate_bias(c) for c in halo_cats]
print('Warning: this is weighted by stellar mass, not halo mass (and so is incorrect!)')
for t, b in zip(SURVEY_NAMES, survey_biases):
    # plt.hist(np.real_if_close(b), bins=100)
    # plt.title(f'{t}: mean = {np.real(np.mean(b)):.3f}, median = {np.real(np.median(b)):.3f}')
    # plt.show()
    print(f'{t}: {b}')

# Halo mass-Analytical Bias Curve

In [None]:
def read_fofp_file(path):
    """
    Read FoF properties file from the given path.
    Just returns a tuple of masses and positions for now, but can be modified
    easily.
    Note that positions are in box size units.

    """
    halo_cat = pd.read_csv(path, header=0, skiprows=30, delim_whitespace=True)
    halo_cat.rename(columns={'#ID': 'ID'}, inplace=True)
    print(halo_cat.columns)
    print("{} halos read".format(len(halo_cat)))

    with open(path, 'r') as f:
        for i, l in enumerate(f.readlines()):
            match = re.search(r'#h = ([0-9].[0-9]*)', l)
            if match is not None:
                littleh = float(match.group(1))
                print(f'Found little h to be {littleh}')
                break
            elif i > 200:
                raise RuntimeError
                
    # The mass is initially in Msun (no h). We DO NOT convert to Msun/h by dividing by the h provided in the file.
    # Halo virial mass
    halo_masses = halo_cat.M.to_numpy() #* littleh
    # Stellar mass (truth, including intrinsic spread?)
    stellar_masses_true = halo_cat.SM.to_numpy() #* littleh
    # Stellar mass (including observational + systematic errors)
    stellar_masses_obs = halo_cat.obs_SM.to_numpy() #* littleh
    
    # sigma = np.fromfile(fof_file, dtype="f4", count=num_groups)
    # v_circ = np.fromfile(fof_file, dtype="f4", count=num_groups)
    # min_id = np.fromfile(fof_file, dtype="f4", count=num_groups)
    # v_pot_e = np.fromfile(fof_file, dtype="f4", count=num_groups)
    
    # Apply RSD.
    z_rsd_offset = halo_cat.VZ * ((1 + EVAL_Z) / COSMO.H(EVAL_Z) * littleh).value
    plt.hist(z_rsd_offset, bins=100)
    halo_cat.Z += z_rsd_offset
    
    # In Mpc/h
    positions = np.array([halo_cat.X, halo_cat.Y, halo_cat.Z]).T

    return (halo_masses, stellar_masses_true, stellar_masses_obs, positions)

In [None]:
fofp_path = os.path.join(constants.SIM_DIR_BASE, 'sfr_catalog_0.288498.txt')
masses, _, smass_obs, positions = read_fofp_file(fofp_path)

In [None]:
plt.hist(np.log10(masses), bins=20)
plt.yscale('log')
plt.xlabel(r'Halo mass [$M_\odot$]')
plt.ylabel('# halos')

In [None]:
curve_mass = masses

def plot_mass_bias_curve(log_width, log_step=0.1):
    curve_x = []
    curve_y = []

    for log_lb in np.arange(np.log10(np.percentile(curve_mass, 1)), 13.4, log_step):
        log_ub = log_lb + log_width
        if log_ub > np.log10(np.max(curve_mass)):
            continue
        mask = np.logical_and(curve_mass >= 10**log_lb, curve_mass < 10**log_ub)
        masked_ac = nbodykit.source.catalog.ArrayCatalog(data={'Position': positions[mask], 'Weight': curve_mass[mask]},
                                                         BoxSize=(SIM_LEN_MPCH, SIM_LEN_MPCH, SIM_LEN_MPCH))
        curve_y.append(calculate_bias(masked_ac))
        curve_x.append(np.log10(np.mean(curve_mass[mask])))
    return curve_x, curve_y

In [None]:
plt.plot(*plot_mass_bias_curve(1), color='red', label='1 dex')
plt.plot(*plot_mass_bias_curve(0.5), color='green', label='0.5 dex')
plt.plot(*plot_mass_bias_curve(0.25), color='blue', label='0.25 dex')
plt.xlabel(r'Average DM halo mass [$log_{10}(M_\odot)$]')
plt.ylabel(r'Analytical galaxy bias $b_g$')
plt.legend(title='Window size')
plt.savefig(os.path.join(constants.FIG_DIR_BASE, 'sim-mass-bias-curve.png'))

In [None]:
plt.hist(np.log10(masses))