In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
df = pd.read_parquet("../data/dp1_processed_full.pq")
z_cat = df[np.isfinite(df.z_best)]

In [None]:
fig, axes = plt.subplots(3, 3, figsize=(7.5, 6), dpi=200)

for ax in axes[0]:
    ax.set(aspect="equal", xlim=(-1, 3), ylim=(-1, 3), xticks=np.arange(-1, 4))
for ax in axes[1:].flatten():
    ax.set(box_aspect=1, xlim=(0, 6), xlabel="Redshift", ylabel="Number of objects")

axes[0, 0].set_title("$u\\,$-dropouts")
axes[0, 1].set_title("$g\\,$-dropouts")
axes[0, 2].set_title("$r\\,$-dropouts")

settings = dict(s=5, marker=".", edgecolor="none")

# Plot galaxy points
axes[0, 0].set(ylabel="$u-g$", xlabel="$g-r$")
mask = (z_cat.z_best < 2) & (z_cat.r_snr > 5)
axes[0, 0].scatter(z_cat["g-r"][mask], z_cat["u-g"][mask], s=0.1)
mask = (z_cat.z_best > 2) & (z_cat.r_snr > 5)
axes[0, 0].scatter(z_cat["g-r"][mask], z_cat["u-g"][mask], s=0.1)
# Plot cuts
axes[0, 0].plot([-1, 0.11], [0.88, 0.88], c="k", lw=0.5)
axes[0, 0].plot([1.2, 1.2], [2.86, 3], c="k", lw=0.5)
x = np.linspace(0.11, 1.2, 100)
axes[0, 0].plot(x, 1.8 * x + 0.68, c="k", lw=0.5)

# Plot dropout sample
u_drop = df[
    (df.r_snr > 5)
    & (df["u-g"] > 0.88)
    & (df["g-r"] < 1.2)
    & (df["u-g"] > 1.8 * df["g-r"] + 0.68)
]
axes[1, 0].hist(u_drop.z_best, bins="auto", range=(0, 6))
axes[1, 0].text(
    0.96,
    0.85,
    "SNR $> 5$",
    transform=axes[1, 0].transAxes,
    ha="right",
)
axes[1, 0].text(
    0.96,
    0.72,
    "$f_\\text{int}=$" + f"{sum(u_drop.z_best < 2) / sum(u_drop.z_best > 0):.2f}",
    transform=axes[1, 0].transAxes,
    ha="right",
)
# Same for SNR > 10
u_drop_10 = u_drop.query("r_snr > 10")
axes[2, 0].hist(u_drop_10.z_best, bins="auto", range=(0, 6))
axes[2, 0].text(
    0.98,
    0.85,
    "SNR $> 10$",
    transform=axes[2, 0].transAxes,
    ha="right",
)
axes[2, 0].text(
    0.96,
    0.72,
    "$f_\\text{int}=$" + f"{sum(u_drop_10.z_best < 2) / sum(u_drop_10.z_best > 0):.2f}",
    transform=axes[2, 0].transAxes,
    ha="right",
)
axes[2, 0].set(ylim=axes[1, 0].get_ylim())


# Plot galaxy points
axes[0, 1].set(ylabel="$g-r$", xlabel="$r-i$")
mask = (z_cat.z_best < 3.2) & (z_cat.i_snr > 5)
axes[0, 1].scatter(z_cat["r-i"][mask], z_cat["g-r"][mask], **settings)
mask = (df.z_best > 3.2) & (df.i_snr > 5)
axes[0, 1].scatter(z_cat["r-i"][mask], z_cat["g-r"][mask], **settings)
# Plot cuts
axes[0, 1].plot([-1, 0.11], [1.2, 1.2], c="k", lw=0.5)
axes[0, 1].plot([1, 1], [2.52, 3], c="k", lw=0.5)
x = np.linspace(0.13, 1, 100)
axes[0, 1].plot(x, 1.5 * x + 1.0, c="k", lw=0.5)

# Plot dropout sample
g_drop = df[
    (df.i_snr > 5)
    & (df["g-r"] > 1.2)
    & (df["r-i"] < 1)
    & (df["g-r"] > 1.5 * df["r-i"] + 1.0)
]
axes[1, 1].hist(g_drop.z_best, bins="auto", range=(0, 6))
axes[1, 1].text(
    0.08,
    0.85,
    "SNR $> 5$",
    transform=axes[1, 1].transAxes,
    ha="left",
)
axes[1, 1].text(
    0.05,
    0.72,
    "$f_\\text{int}=$" + f"{sum(g_drop.z_best < 2) / sum(g_drop.z_best > 0):.2f}",
    transform=axes[1, 1].transAxes,
    ha="left",
)
# Same for SNR > 10
g_drop_10 = g_drop.query("i_snr > 10")
axes[2, 1].hist(g_drop_10.z_best, bins="auto", range=(0, 6))
axes[2, 1].text(
    0.06,
    0.85,
    "SNR $> 10$",
    transform=axes[2, 1].transAxes,
    ha="left",
)
axes[2, 1].text(
    0.05,
    0.72,
    "$f_\\text{int}=$" + f"{sum(g_drop_10.z_best < 2) / sum(g_drop_10.z_best > 0):.2f}",
    transform=axes[2, 1].transAxes,
    ha="left",
)
axes[2, 1].set(ylim=axes[1, 1].get_ylim())

# Plot galaxy points
axes[0, 2].set(ylabel="$r-i$", xlabel="$i-z$")
mask = (z_cat.z_best < 4) & (z_cat.z_snr > 5)
axes[0, 2].scatter(z_cat["i-z"][mask], z_cat["r-i"][mask], s=1)
mask = (z_cat.z_best > 4) & (z_cat.z_snr > 5)
axes[0, 2].scatter(z_cat["i-z"][mask], z_cat["r-i"][mask], s=1)
# Plot cuts
axes[0, 2].plot([-1, 0.05], [1.2, 1.2], c="k", lw=0.5)
axes[0, 2].plot([0.5, 0.5], [1.87, 3], c="k", lw=0.5)
x = np.linspace(0.07, 0.5, 100)
axes[0, 2].plot(x, 1.5 * x + 1.1, c="k", lw=0.5)

# Plot dropout sample
r_drop = df[
    (df.z_snr > 5)
    & (df["r-i"] > 1.2)
    & (df["i-z"] < 0.5)
    & (df["r-i"] > 1.5 * df["i-z"] + 1.1)
]
axes[1, 2].hist(r_drop.z_best, bins="auto", range=(0, 6))
axes[1, 2].set(ylim=(0, 16))
axes[1, 2].text(
    0.08,
    0.85,
    "SNR $> 5$",
    transform=axes[1, 2].transAxes,
    ha="left",
)
axes[1, 2].text(
    0.05,
    0.72,
    "$f_\\text{int}=$" + f"{sum(r_drop.z_best < 3) / sum(r_drop.z_best > 0):.2f}",
    transform=axes[1, 2].transAxes,
    ha="left",
)
# Same for SNR > 10
r_drop_10 = r_drop.query("z_snr > 10")
axes[2, 2].hist(r_drop_10.z_best, bins="auto", range=(0, 6))
axes[2, 2].text(
    0.06,
    0.85,
    "SNR $> 10$",
    transform=axes[2, 2].transAxes,
    ha="left",
)
axes[2, 2].text(
    0.05,
    0.72,
    "$f_\\text{int}=$" + f"{sum(r_drop_10.z_best < 3) / sum(r_drop_10.z_best > 0):.2f}",
    transform=axes[2, 2].transAxes,
    ha="left",
)
axes[2, 2].set(ylim=axes[1, 2].get_ylim())


fig.subplots_adjust(hspace=0.45)
fig.savefig("../figures/lbg_color_cuts.pdf", bbox_inches="tight")

In [None]:
np.savez(
    "../data/lbg_color_cut_samples.npz",
    u_drop=u_drop.objectID.values,
    g_drop=g_drop.objectID.values,
    r_drop=r_drop.objectID.values,
    u_drop_10=u_drop_10.objectID.values,
    g_drop_10=g_drop_10.objectID.values,
    r_drop_10=r_drop_10.objectID.values,
)