# Mass Completeness of LRG selection  

(updated for final selection)
Two kinds of Mass supersets are considered, one where we consider the SV sample as superset and another where we consider a $z<21$ sample as the superset.

In [None]:
from pathlib import Path
from importlib import reload
import joblib

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.lines as lines
from astropy.table import Table, vstack

# import mpl_scatter_density
from astropy.cosmology import FlatLambdaCDM
cosmo = FlatLambdaCDM(H0=70, Om0=0.3)

from desitarget.sv1.sv1_targetmask import desi_mask, bgs_mask
from desitarget.sv3.sv3_targetmask import desi_mask, bgs_mask
import utils
reload(utils)
from utils import fluxToMag

In [None]:
survey = ["north", "south"] # choose from `north` or `south
# now we will concatenate both surveys

In [None]:
my_path = Path("/global/cscratch1/sd/bid13/LRG")
lrg_cat = []
mag_lim_cat =[]
for s in survey:
    lrg_path = my_path / (f"LRG_{s}.fits")
    lrg_cat.append(Table.read(lrg_path))
    mag_lim_path = my_path / (f"mag_lim_{s}.fits")
    mag_lim_cat.append(Table.read(mag_lim_path))
lrg_cat = vstack(lrg_cat)
mag_lim_cat = vstack(mag_lim_cat)

# Distribution of Masses

### Mass Completeness of Baseline wrt a magnitude limited sample

In [None]:
z_min=0.3 #Minimum of the redshift range
z_max=1.1 #Maximum of the redshift range
z_bins=8 #Number of redhshift bins to make
mass_bins=50 #Number of mass bins to make

z_edges=np.linspace(z_min,z_max,z_bins+1)

In [None]:
good_mass_mask = (mag_lim_cat["logmass_photoz"]>0)& (mag_lim_cat["logmass_photoz"]<12)
mag_lim_cat = mag_lim_cat[good_mass_mask]

good_mass_mask = (lrg_cat["logmass_photoz"]>0)& (lrg_cat["logmass_photoz"]<12)
lrg_cat = lrg_cat[good_mass_mask]

In [None]:
fig, axs = plt.subplots(2,4, figsize=(17, 8), facecolor='w', edgecolor='k', sharex="col", sharey='row')
axs = axs.ravel()

for i in range(z_bins):
    #select objects in the redshift bin
    mass_z= lrg_cat[ (lrg_cat["zphot"]>=z_edges[i]) & (lrg_cat["zphot"]<z_edges[i+1])] ["logmass_photoz"]
    
    mass_z_mag_lim = mag_lim_cat[ (mag_lim_cat["zphot"]>=z_edges[i]) & (mag_lim_cat["zphot"]<z_edges[i+1])] ["logmass_photoz"]
    
    #generate mass points
    mass_ticks = np.linspace( mass_z_mag_lim.min() ,mass_z_mag_lim.max(),mass_bins)
    completeness=[]
    mass_plot_ticks=[]
    
    #Find fraction of objects greater than the given mass
    for j in range(mass_bins):
        if ((mass_z>=mass_ticks[j]).sum())>50: # Points will be plotted only if there are more than 50 data points in a bin
            completeness.append( (mass_z>=mass_ticks[j]).sum()/(mass_z_mag_lim>=mass_ticks[j]).sum() )
            mass_plot_ticks.append(mass_ticks[j])
        
    
    label = str(np.round(z_edges[i],3))+r"$\leq \mathrm{z}_{phot} <$"+str(np.round(z_edges[i+1],3))
    tot_count, mass_bin_edges = np.histogram(mass_z_mag_lim, bins=mass_bins)
    sel_count, mass_bin_edges = np.histogram(mass_z, bins=mass_bin_edges)
    
    axs[i].plot(mass_plot_ticks,completeness, ls="--", color="k")
    
    utils.hist_on_binned_array(tot_count/tot_count.max(), mass_bin_edges, ax=axs[i], alpha=0.3, color="C0")
    utils.hist_on_binned_array(sel_count/tot_count.max(), mass_bin_edges, ax=axs[i], histtype="step", color="k")
    
    axs[i].set_title(label, size=20)
    axs[i].set_ylim(0,1.05)
    axs[i].set_xlim(10.5,12)
    axs[i].tick_params(axis='x', labelsize=15)
    axs[i].tick_params(axis='y', labelsize=15)
    axs[i].grid(alpha=0.5)
    
fig.text(0.5, 0.03, r"$\log_{10}(\mathrm{M}_{\star}[\mathrm{M}_{\odot}])$)", ha='center',size=30) #Common x label
fig.text(0.06, 0.5, "Completeness", va='center', rotation='vertical',size=30) #Common y label

# plt.tight_layout()
fig.savefig("mass_completeness.pdf", dpi=300, bbox_inches="tight")


### Mass Completeness of Baseline wrt union of SV samples
(i.e. Fraction of objects included that are above a given stellar mass threshold)

In [None]:
# fig, axs = plt.subplots(2,5, figsize=(20, 8), facecolor='w', edgecolor='k', sharex="col", sharey='row')
# axs = axs.ravel()

# for i in range(z_bins):
#     #select objects in the redshift bin
#     mass_z_opt = lrg_opt_cat[ (lrg_opt_cat["zphot"]>=z_edges[i]) & (lrg_opt_cat["zphot"]<z_edges[i+1])] ["logmass"]    
#     mass_z_ir = lrg_ir_cat[ (lrg_ir_cat["zphot"]>=z_edges[i]) & (lrg_ir_cat["zphot"]<z_edges[i+1])] ["logmass"]
    
#     mass_sv_z = lrg_sv_cat[ (lrg_sv_cat["zphot"]>=z_edges[i]) & (lrg_sv_cat["zphot"]<z_edges[i+1])] ["logmass"]
    
#     #generate mass points
#     mass_ticks = np.linspace( mass_sv_z.min() ,mass_sv_z.max(),mass_bins)
#     completeness_opt=[]
#     mass_plot_ticks_opt=[]
#     completeness_ir=[]
#     mass_plot_ticks_ir=[]
    
#     #Find fraction of objects greater than the given mass
#     for j in range(mass_bins):
#         #OPT
#         if ((mass_z_opt>=mass_ticks[j]).sum())>50: # Points will be plotted only if there are more than 50 data points in a bin
#             completeness_opt.append( (mass_z_opt>=mass_ticks[j]).sum()/(mass_sv_z>=mass_ticks[j]).sum() )
#             mass_plot_ticks_opt.append(mass_ticks[j])
#         #IR
#         if ((mass_z_ir>=mass_ticks[j]).sum())>50: # Points will be plotted only if there are more than 50 data points in a bin
#             completeness_ir.append( (mass_z_ir>=mass_ticks[j]).sum()/(mass_sv_z>=mass_ticks[j]).sum() )
#             mass_plot_ticks_ir.append(mass_ticks[j])
    
#     label = str(np.round(z_edges[i],3))+r"$\leq z <$"+str(np.round(z_edges[i+1],3))
#     tot_count, mass_bin_edges = np.histogram(mass_sv_z, bins=mass_bins)
#     sel_count_opt, mass_bin_edges = np.histogram(mass_z_opt, bins=mass_bin_edges)
#     sel_count_ir, mass_bin_edges = np.histogram(mass_z_ir, bins=mass_bin_edges)
    
#     axs[i].plot(mass_plot_ticks_opt,completeness_opt, ls="--", color="k")
#     axs[i].plot(mass_plot_ticks_ir,completeness_ir, ls="--", color="red")
#     utils.hist_on_binned_array(tot_count/tot_count.max(), mass_bin_edges, ax=axs[i], alpha=0.3, color="C0")
#     utils.hist_on_binned_array(sel_count_opt/tot_count.max(), mass_bin_edges, ax=axs[i], histtype="step", color="k")
#     utils.hist_on_binned_array(sel_count_ir/tot_count.max(), mass_bin_edges, ax=axs[i], histtype="step", color="red")
#     axs[i].set_title(label)
#     axs[i].set_ylim(0,1)
#     axs[i].set_xlim(10,12)
#     axs[i].grid(alpha=0.5)
# fig.text(0.5, 0.04, r"Stellar Mass ($\log(M_{\odot})$)", ha='center',size=20) #Common x label
# fig.text(0.06, 0.5, "Fraction Selected", va='center', rotation='vertical',size=20) #Common y label


# handle1 = lines.Line2D([], [], c='k', ls ="--", lw=2)
# handle2 = lines.Line2D([], [], c='red', ls ="--", lw=2)
# fig.legend( (handle1,handle2), ("OPT Selection", "IR Selection"), loc="center right", bbox_to_anchor=(0.5,1))

### Location of massive objects

In [None]:
# high_mass = lrg_sv_cat[~(baseline_mask_opt | baseline_mask_ir)]
# high_mass = high_mass[(high_mass["logmass"]>11)&(high_mass["logmass"]<11.5)]
# high_mass = high_mass[(high_mass["zphot"]>0.35) & (high_mass["zphot"]<1.)]
# high_mass = high_mass[(high_mass["zfibermag"]<21.5)]
sv_mask_opt = ((lrg_sv_cat["SV1_DESI_TARGET"] & desi_mask.mask("LRG_SV_OPT_"+survey.upper()))>0)
sv_mask_ir = ((lrg_sv_cat["SV1_DESI_TARGET"] & desi_mask.mask("LRG_SV_IR_"+survey.upper()))>0)

# high_mass = lrg_sv_cat[((~baseline_mask_opt) & sv_mask_opt)]
high_mass = lrg_sv_cat[((~baseline_mask_ir) & sv_mask_ir)]
high_mass = high_mass[(high_mass["logmass"]>11)&(high_mass["logmass"]<11.5)]
high_mass = high_mass[(high_mass["zphot"]>0.35) & (high_mass["zphot"]<1.)]
# high_mass = high_mass[(high_mass["zfibermag"]<21.5)]

In [None]:
utils.ts_plot(lrg_ir_cat)

### Mass completeness with spectroscopic redshifts

In [None]:
z_cat = Table.read("/global/cfs/cdirs/desi/survey/catalogs/SV1/redshift_comps/cascades/3/LRG/alltiles_LRGzinfo.fits")

z_cat = z_cat[(z_cat["FIBERSTATUS"]==0) & (z_cat["ZWARN"]==0)]
z_cat = z_cat[z_cat["PHOTSYS"]==survey.upper()[0]]
z_cat = z_cat[z_cat["SPECTYPE"]=="GALAXY"]
z_cat = z_cat[z_cat["TARGETS"] == "QSO+LRG"]

names = [name for name in z_cat.colnames if len(z_cat[name].shape) <= 1]
z_cat = z_cat[names].to_pandas()
# z_cat = z_cat[z_cat["TILEID"].isin([80605, 80607, 80609, 80620, 80622, 68000,
#                                     68001, 68002, 70000, 70001, 70002, 70003,
#                                     80668, 80670, 80672, 80674, 80676, 80678,
#                                     80680, 80682, 80684, 80686, 80688, 80690,
#                                     80692, 80694, 80696, 80698, 80700, 80702,
#                                     80704, 80706, 80708, 80710, 80712])]
z_cat = z_cat[z_cat.duplicated(subset="TARGETID")][["TARGETID", "Z"]]

In [None]:
z_cat = z_cat.merge(lrg_sv_cat.to_pandas(), how="inner", on= "TARGETID")

In [None]:
# Get masses based on spec-z instead of photoz
regrf = joblib.load(f'/global/cfs/cdirs/desi/users/rongpu/ls_dr9.0_photoz/stellar_mass/rf_20210206_{survey}-mgc_redshift.pkl')
data = np.column_stack((z_cat['gmag']-z_cat['rmag'], z_cat['rmag']-z_cat['zmag'], z_cat['zmag']-z_cat['w1mag'], z_cat['w1mag']-z_cat['w2mag'], z_cat['Z']))
distmod = cosmo.distmod(z_cat['Z'])
m_to_l_predict = regrf.predict(data)
z_cat["logmass_z"] = m_to_l_predict + 0.4*(4.45-(z_cat['zmag'] - distmod.to_value() ))

In [None]:
sv_mask_opt = ((z_cat["SV1_DESI_TARGET"] & desi_mask.mask("LRG_SV_OPT_"+survey.upper()))>0)
sv_mask_ir = ((z_cat["SV1_DESI_TARGET"] & desi_mask.mask("LRG_SV_IR_"+survey.upper()))>0)

sv_mask = sv_mask_opt | sv_mask_ir
good_mass_mask = (z_cat["logmass"]>0) & (z_cat["logmass"]<12) # remove points with no mass values
z_cat = z_cat[sv_mask & good_mass_mask]


baseline_mask_opt = ((z_cat["SV1_DESI_TARGET"] & desi_mask.mask("LRG_OPT_"+survey.upper()))>0)
lrg_opt_cat = z_cat[baseline_mask_opt]

baseline_mask_ir = ((z_cat["SV1_DESI_TARGET"] & desi_mask.mask("LRG_IR_"+survey.upper()))>0)
lrg_ir_cat = z_cat[baseline_mask_ir]

In [None]:
fig, axs = plt.subplots(2,4, figsize=(15, 8), facecolor='w', edgecolor='k', sharex="col", sharey='row')
axs = axs.ravel()

for i in range(z_bins):
    #select objects in the redshift bin
    mass_z_opt = lrg_opt_cat[ (lrg_opt_cat["Z"]>=z_edges[i]) & (lrg_opt_cat["Z"]<z_edges[i+1])] ["logmass_z"]    
    mass_z_ir = lrg_ir_cat[ (lrg_ir_cat["Z"]>=z_edges[i]) & (lrg_ir_cat["Z"]<z_edges[i+1])] ["logmass_z"]
    
    mass_sv_z = z_cat[ (z_cat["Z"]>=z_edges[i]) & (z_cat["Z"]<z_edges[i+1])] ["logmass_z"]
    
    #generate mass points
    mass_ticks = np.linspace( mass_sv_z.min() ,mass_sv_z.max(),mass_bins)
    completeness_opt=[]
    mass_plot_ticks_opt=[]
    completeness_ir=[]
    mass_plot_ticks_ir=[]
    
    #Find fraction of objects greater than the given mass
    for j in range(mass_bins):
        #OPT
        if ((mass_z_opt>=mass_ticks[j]).sum())>50: # Points will be plotted only if there are more than 50 data points in a bin
            completeness_opt.append( (mass_z_opt>=mass_ticks[j]).sum()/(mass_sv_z>=mass_ticks[j]).sum() )
            mass_plot_ticks_opt.append(mass_ticks[j])
        #IR
        if ((mass_z_ir>=mass_ticks[j]).sum())>50: # Points will be plotted only if there are more than 50 data points in a bin
            completeness_ir.append( (mass_z_ir>=mass_ticks[j]).sum()/(mass_sv_z>=mass_ticks[j]).sum() )
            mass_plot_ticks_ir.append(mass_ticks[j])
    
    label = str(np.round(z_edges[i],3))+r"$\leq z <$"+str(np.round(z_edges[i+1],3))
    tot_count, mass_bin_edges = np.histogram(mass_sv_z, bins=mass_bins)
    sel_count_opt, mass_bin_edges = np.histogram(mass_z_opt, bins=mass_bin_edges)
    sel_count_ir, mass_bin_edges = np.histogram(mass_z_ir, bins=mass_bin_edges)
    
    axs[i].plot(mass_plot_ticks_opt,completeness_opt, ls="--", color="k")
    axs[i].plot(mass_plot_ticks_ir,completeness_ir, ls="--", color="red")
    utils.hist_on_binned_array(tot_count/tot_count.max(), mass_bin_edges, ax=axs[i], alpha=0.3, color="C0")
    utils.hist_on_binned_array(sel_count_opt/tot_count.max(), mass_bin_edges, ax=axs[i], histtype="step", color="k")
    utils.hist_on_binned_array(sel_count_ir/tot_count.max(), mass_bin_edges, ax=axs[i], histtype="step", color="red")
    axs[i].set_title(label)
    axs[i].set_ylim(0,1)
    axs[i].set_xlim(10,12)
    axs[i].grid(alpha=0.5)
fig.text(0.5, 0.04, r"Stellar Mass ($\log(M_{\odot})$)", ha='center',size=20) #Common x label
fig.text(0.06, 0.5, "Fraction Selected", va='center', rotation='vertical',size=20) #Common y label


handle1 = lines.Line2D([], [], c='k', ls ="--", lw=2)
handle2 = lines.Line2D([], [], c='red', ls ="--", lw=2)
fig.legend( (handle1,handle2), ("OPT Selection", "IR Selection"), loc="center right", bbox_to_anchor=(0.5,1))