# Part 1: APOGEE Selection Function Calculation

This notebook is the first part of three for reproducing [Imig et al 2023](https://astrojimig.github.io/pdfs/Imig_MW_density.pdf). In this notebook, we calculate the APOGEE Raw and Effective Selection Functions.

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
import dill as pickle
import astropy.io.fits as fits
import time
import pandas as pd
import mwdust
import multiprocessing as mp
from tqdm import tqdm
from numba import jit

from sample_selection import set_env_variables, setup_maap_bins, distmod_bins

set_env_variables()

import apogee.select as apsel
import apogee.tools.read as apread


In [None]:
# Define Plotting Parameters
params = {
    "axes.labelsize": 36,
    "xtick.labelsize": 36,
    "ytick.labelsize": 36,
    "text.usetex": False,
    "lines.linewidth": 1,
    "axes.titlesize": 22,
    "font.family": "serif",
    "font.size": 36,
}
plt.rcParams.update(params)

# Config Settings

In [None]:
# parallelize: use multiprocessing for calculating effective selection function (recommended)
parallelize = False

# savename: file format for saving effective selection function
# in format '{savename}_{bin number}.npz'
savename = "apogee_effsel"

# Clobber: if TRUE, deletes all files and starts over from scratch
clobber = False

# dmap: choose your Dustmap
dmap = mwdust.Combined19()


# 1A: Calculate the Raw Selection Function

This cell calculates the APOGEE Raw Selection Function (if the file doesn't alerady exist) using the `apogee` module.

This step usually takes several hours.

In [None]:
# Only calculate if path doesn't exist
if os.path.exists("selfuncs/apogeeCombinedSF.dat"):
    with open("selfuncs/apogeeCombinedSF.dat", "rb") as f:
        apogee_selfunc = pickle.load(f)
else:
    allstar = apread.allStar(main=True, rmdups=True)
    apogee_selfunc = apsel.apogeeCombinedSelect(year=10)
    with open("selfuncs/apogeeCombinedSF.dat", "wb") as f:
        pickle.dump(apogee_selfunc, f)

### Plot and Verify the Raw Selection Function

In [None]:
# Coordinates in glon and glat
aposel_glons = np.array(
    [apogee_selfunc.glonGlat(x)[0][0] for x in apogee_selfunc._locations]
)
aposel_glats = np.array(
    [apogee_selfunc.glonGlat(x)[1][0] for x in apogee_selfunc._locations]
)

sel_fractions = (apogee_selfunc._nspec_short / apogee_selfunc._nphot_short).T[
    0
] * 100

plt.figure(figsize=(20, 10))
idx = np.argsort(sel_fractions)
plt.scatter(
    aposel_glons[idx],
    aposel_glats[idx],
    c=sel_fractions[idx],
    vmin=0,
    vmax=100,
    cmap="viridis",
    s=50,
)

plt.colorbar(label="Selection Fraction (%)")
plt.title("Short Cohort")
plt.grid()
plt.xlim(0, 360)
plt.ylim(-90, 90)
plt.xlabel("Galactic Longitude (deg)")
plt.ylabel("Galactic Latitude (deg)")

plt.show()

## 1B: Calculate the Effective Selection Function

Calcualte the Effective Selection Function on the grid of ages and metallicities definted by the data sample. This step requires a choice of isochrones and dust map.

In [None]:
JHK_iso = pd.read_csv("isochrones/parsec_v3.dat", sep="\s+", comment="#")

print("Isochrones cover:")
print(f"[M/H]: {np.sort(list(set(JHK_iso['MH'])))}")
print(f"log(age): {np.sort(list(set(JHK_iso['logAge'])))}")

In [None]:
def add_deltaM_column(iso):
    """Adds a deltaM column to the isochrones"""
    deltaM = []
    for i in range(len(iso["MH"])):
        if (np.array(iso["MH"])[i - 1] == np.array(iso["MH"])[i]) & (
            np.array(iso["logAge"])[i - 1] == np.array(iso["logAge"])[i]
        ) == True:
            deltaM.append(
                np.array(iso["int_IMF"])[i] - np.array(iso["int_IMF"])[i - 1]
            )
        else:
            deltaM.append(
                np.array(iso["int_IMF"])[i + 1] - np.array(iso["int_IMF"])[i]
            )

    return np.array(deltaM)


all_deltaMs = add_deltaM_column(JHK_iso)

In [None]:
def effsel_per_loc(i):
    """
    computes the effective selection function along the line of sight of one APOGEE location.
    """
    pbar.update(1)
    loc = all_locations[i]
    jkmin = apogee_selfunc.JKmin(loc)

    # No observations
    if (
        np.sum(
            [
                np.nansum(apogee_selfunc._nspec_short[i]),
                np.nansum(apogee_selfunc._nspec_medium[i]),
                np.nansum(apogee_selfunc._nspec_long[i]),
            ]
        )
        < 1.0
    ):
        effsel = np.zeros(len(ds))

    # Valid color bin
    if np.isfinite(jkmin):
        # cut isochrone based on color bins
        isom = np.where((J - K) >= jkmin)[0]
        norm_weights = weights[isom] / np.nansum(weights[isom])
        effsel = apof.__call__(
            loc, ds, MH=H[isom], JK0=(J - K)[isom], weights=norm_weights
        )  # *apo.area(loc)

    # Color bin Nan
    else:
        # print(f'WARNING: Unknown colorbin {jkmin} for location {loc} ???')
        effsel = np.ones(len(ds)) * np.nan

    # except: #something else crashed, idk. something wrong with isochrone points?? need to fix this someday
    #        #print('WARNING: Unknown error for location {} with colorbin {} ???'.format(loc,apo.JKmin(loc)))
    #        effsel = np.ones(len(ds))*np.nan

    # print(effsel)
    return effsel

In [None]:
if clobber:
    os.system("rm selfuncs/effsel_bins/*.npz")

In [None]:
# Set up some parameters
Nthreads = mp.cpu_count()

bini = 0
all_locations = apogee_selfunc._locations
# redo some bins
redo_bins = []

# MAAP bin definition
age_bins, mh_bins = setup_maap_bins()
N_bins = len(age_bins["center"]) * len(mh_bins["center"])

# logg limits
apogee_sample = fits.open("apogee_sample.fits")[1].data
logg_min_lim = np.min(apogee_sample["LOGG"])
logg_max_lim = np.max(apogee_sample["LOGG"])

# Distance mod bins
ndistmods, ds, distmods, minmax_distmods = distmod_bins()


In [None]:
apogee_sample

In [None]:
for i_f, f in enumerate(mh_bins["center"]):
    for i_a, a in enumerate(age_bins["center"]):
        log_a = round(np.log10(a * 1e9), 2)
        f = round(f, 2)
        print("=" * 50)
        print(f"Bin {bini + 1}/{N_bins}")
        print(f"[M/H] = {f} , logAge = {log_a}")
        print("=" * 50)
        bin_data_mask = (apogee_sample["METAL_BIN_I"] == i_f) & (
            apogee_sample["AGE_BIN_I"] == i_a
        )
        starcount1 = len(
            apogee_sample[
                bin_data_mask & (apogee_sample["ALPHA_BIN"] == "LOW")
            ]
        )
        starcount2 = len(
            apogee_sample[
                bin_data_mask & (apogee_sample["ALPHA_BIN"] == "HIGH")
            ]
        )
        if (starcount1 < 100) & (starcount2 < 100):  # no stars, no need for SF
            print(f"{starcount1} + {starcount2} stars. Skipping bin.")
        else:
            fname = f"selfuncs/effsel_bins/{savename}_{bini}.npz"
            if (os.path.exists(fname) == False) & (bini not in redo_bins):
                iso_mask = (JHK_iso["MH"] == f) & (JHK_iso["logAge"] == log_a)
                iso_mask = (
                    iso_mask
                    & (JHK_iso["logg"] <= logg_max_lim)
                    & (JHK_iso["logg"] >= logg_min_lim)
                )
                iso_mask = np.where(iso_mask)[0]
                bin_iso = JHK_iso.iloc[iso_mask]
                deltaMs = all_deltaMs[iso_mask]
                m = deltaMs > 0
                H, J, K = (
                    bin_iso["Hmag"][m],
                    bin_iso["Jmag"][m],
                    bin_iso["Ksmag"][m],
                )
                weights = deltaMs[m] / np.sum(deltaMs[m])
                effsel_array = np.zeros((len(all_locations), ndistmods))
                H = np.array(H)
                J = np.array(J)
                K = np.array(K)
                # Initiate Selection Function
                apof = apsel.apogeeEffectiveSelect(
                    apogee_selfunc,
                    dmap3d=dmap,
                    MH=H,
                    JK0=(J - K),
                    weights=weights,
                )

                pbar = tqdm(total=len(all_locations), position=0, leave=True)

                if parallelize:
                    with mp.Pool(Nthreads) as p:
                        effsel_array = p.map(
                            effsel_per_loc, range(len(all_locations))
                        )

                else:
                    for i in range(len(all_locations)):
                        effsel_array[i] = effsel_per_loc(i)

                print(f"Saving {fname}")
                np.savez(fname, np.array(effsel_array))
            else:
                print("Already done.")

        bini += 1

### Concatenate the selection function for each bin into one total file

In [None]:
# Concatenate into one total file
all_effels = []

bini = 0
for i_f, f in enumerate(feh_bins_center):
    for i_a, a in enumerate(age_bins_center):
        fname = f"selfuncs/effsel_bins/{savename}_{bini}.npz"
        effsel_bini = np.load(fname)["arr_0"]
        all_effels.append(effsel_bini)
        bini += 1

np.savez("selfuncs/effsel_allbins.npz", np.array(all_effels))