In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd

from astropy.table import Table
import astropy.units as u


In [None]:
import arya
import surp
from surp.gce_math import is_high_alpha
from surp._globals import DATA_DIR


Apogee can be downloaded from https://data.sdss.org/sas/dr17/apogee/spectro/aspcap/dr17/synspec_rev1/allStar-dr17-synspec_rev1.fits, and astroNN from https://data.sdss.org/sas/dr17/env/APOGEE_ASTRO_NN/apogee_astroNN-DR17.fits.

In [None]:
plt.rcParams["text.usetex"] = False

# Basic cleaning

In [None]:
filename = surp.DATA_DIR + "/surveys/allStar-dr17-synspec_rev1.fits"

In [None]:
filename_ann = surp.DATA_DIR + "/surveys/apogee_astroNN-DR17.fits"


## Double check sha (optional)

In [None]:
check_sha = True

In [None]:
if check_sha:
    import subprocess
    result = subprocess.run(["sha1sum", filename], stdout=subprocess.PIPE)
    
    sha = result.stdout.decode().split(" ")[0]
    
    print("apogee matches ", sha == "7aa2f381de0e8e246f9833cc7da540ef45096702") # shasum from file


    result = subprocess.run(["sha1sum", filename_ann], stdout=subprocess.PIPE)
    sha = result.stdout.decode().split(" ")[0]
    print("ANN matches ", sha == "c422b9adba840b3415af2fe6dec6500219f1b68f") # shasum from file

## Loading and basic cleaning

In [None]:
allStar = Table.read(filename, format="fits", hdu=1, memmap=True)

In [None]:
def bracket(df, ele, ele2="H"):
    """
    Helper function for subgiants()
    creates the abundance ratio [A/B]
    from the  APOGEE dataframe
    """
    if ele2 == "H":
        if ele == "FE":
            return df["FE_H"]
        else:
            return df["%s_FE" % ele] + df["FE_H"]
    else:
        if ele2 == "FE":
            return df["%s_FE" % ele]
        else:
            return df["%s_FE" % ele] - df["%s_FE" % ele2]


In [None]:
df = allStar

# Add useful abundance ratios
df["O_H"] = bracket(df, "O")
df["MG_H"] = bracket(df, "MG")
df["C_O"] = bracket(df, "C", "O")
df["C_MG"] = bracket(df, "C", "MG")
df["C_H"] = bracket(df, "C", "H")

df["C_N"] = bracket(df, "C", "N")
df["N_H"] = bracket(df, "N", "H")
df["N_O"] = bracket(df, "N", "O")
df["N_MG"] = bracket(df, "N", "MG")

df["FE_O"] = -df["O_FE"]
df["FE_MG"] = -df["MG_FE"]

df["C_MG_ERR"] = df["C_FE_ERR"] + df["MG_FE_ERR"]
df["N_MG_ERR"] = df["N_FE_ERR"] + df["MG_FE_ERR"]
df["C_N_ERR"] = df["N_FE_ERR"] + df["C_FE_ERR"]
df["MG_H_ERR"] = df["MG_FE_ERR"] + df["FE_H_ERR"]
df["C_H_ERR"] = df["C_FE_ERR"] + df["FE_H_ERR"]

# add high/low alpha column
df["high_alpha"] = is_high_alpha(df["MG_FE"], df["FE_H"])

allStar = df

del df

### Bitmasks

See bitmasks in https://www.sdss4.org/dr17/irspec/apogee-bitmasks/#APOGEE_PARAMFLAG

'ancillary young embedded cluster member', 'ancillary emission line star', 'MIR-detected candidate cluster member (ancillary)', 

APOGEE2_Target3: 'Selected as part of the EB program', 'Selected as part of the young cluster study (IN-SYNC)', 'Ancillary W3/4/5 star forming complex'

              

Finally, I restrict to the given Log(g)-Teff domain:
log(g) ≥ 3.5
log(g) ≤ 0.004 ∗ (𝑇eff ) − 15.7
log(g) ≤ 0.00070588 ∗ (𝑇eff ) + 0.358836
log(g) ≤ −0.0015 ∗ (𝑇eff ) + 12.05
log(g) ≥ 0.0012 ∗ (𝑇eff ) − 2.8

In [None]:
apogee_aspcapflag = 0

# apogee_aspcapflag ^= 1<<23 # STAR_BAD (TEFF, LOGG, CHI2, COLORTE, ROTATION, SN, GRIDEDGE_BAD)


In [None]:
# starflag is a bitwise and for each visit

apogee_starflag = 0

# these don't help
# apogee_starflag ^= 1<<18 # bad_rv_companion
# apogee_starflag ^= 1<<22 # RV_FAIL

In [None]:
apogee_target2 = 0

apogee_target2 ^= 1<<17 # APOGEE_MIRCLUSTER_STAR
apogee_target2 ^= 1<<15 # APOGEE_EMISSION_STAR  emission line stars
apogee_target2 ^= 1<<13 # APOGEE_EMBEDDEDCLUSTER_STAR embedded cluster

In [None]:
apogee2_target3 = 0

apogee2_target3 ^= 1<<1 # APOGEE2_EB eclipsing binary program
apogee2_target3 ^= 1<<5 # APOGEE2_YOUNG_CLUSTER # young cluster (IN-SYNC)
apogee2_target3 ^= 1<<18 # APOGEE2_W345 W3/4/5 star forming complex


In [None]:
extratarg = 1<<4 # DUPLICATE (keeps highest S/N observation)

In [None]:
elem_flag = 0
# elem_flag ^= 1<<0 # GRIDEDGE_BAD	0	Parameter within 1/8 grid spacing of grid edge : true value may be off grid
# elem_flag ^= 1<<1 # CALRANGE_BAD	1	Parameter outside valid range of calibration determination
# elem_flag ^= 1<<2 # OTHER_BAD	2	Other error condition
# elem_flag ^= 1<<3 # FERRE_FAIL	3	Failed solution in FERRE
# elem_flag ^= 1<<4


In [None]:
print(bin(apogee2_target3))
print(bin(apogee_target2))
print(bin(apogee_starflag))
print(bin(apogee_aspcapflag))
print(bin(extratarg))
print(bin(elem_flag))

In [None]:
mask = np.full(len(allStar), True)
print(sum(~mask))

mask &= (allStar["ASPCAPFLAG"] & apogee_aspcapflag) == 0
print(sum(~mask))

mask &= (allStar["STARFLAG"] & apogee_starflag) == 0
print(sum(~mask))

mask &= (allStar["APOGEE2_TARGET3"] & apogee2_target3) == 0
print(sum(~mask))

mask &= (allStar["APOGEE_TARGET2"] & apogee_target2) == 0
print(sum(~mask))


mask &= (allStar["EXTRATARG"] & extratarg) == 0


mask &= (allStar["FE_H_FLAG"] & elem_flag) == 0
print(sum(~mask))

mask &= (allStar["MG_FE_FLAG"] & elem_flag) == 0
print(sum(~mask))

mask &= (allStar["C_FE_FLAG"] & elem_flag) == 0
print(sum(~mask))


print(sum(~mask))
print("remaining ", sum(mask))

In [None]:
np.sum(~np.isfinite(allStar[mask]["FE_H"]))

In [None]:
for col in ["FE_H", "MG_FE", "C_FE", "LOGG", "TEFF"]:
    print(col)
    mask &= np.isfinite(allStar[col])
    mask &= np.isfinite(allStar[col + "_ERR"])
    print(sum(~mask))
    print()

In [None]:
print(sum(mask))

In [None]:
allStarClean = allStar[mask]
names = [name for name in allStarClean.colnames if len(allStarClean[name].shape) <= 1]
allStarClean = allStarClean[names]

In [None]:
len(allStarClean)

## Reading in astroNN

In [None]:
astroNN = Table.read(filename_ann, format="fits", hdu=1, memmap=True)

names = [name for name in astroNN.colnames if len(astroNN[name].shape) <= 1]
astroNN = astroNN[names].to_pandas()

In [None]:
astroNN = astroNN[~astroNN["APOGEE_ID"].duplicated(keep="last")].copy()

In [None]:
astroNN.set_index("APOGEE_ID", inplace=True)
astroNN.columns = astroNN.columns.map(lambda x: str(x) + '_ANN')

In [None]:
astroNN

In [None]:
len(astroNN) == len(astroNN.index.unique())

In [None]:
plt.hist2d(astroNN["TEFF_ANN"], astroNN["LOGG_ANN"], range=((3000, 7000), (-0.5, 5)), bins=100, norm="log", cmin=3, cmap=arya.style.get_cmap(to_white=True, reverse=True));

plt.gca().invert_yaxis()
plt.gca().invert_xaxis()
plt.xlabel("logg")
plt.ylabel("teff")

# APOGEE alla jack

Jack's coordinates are

- (3.5, 4800)
- (3.8, 4875)
- (4.1, 5300)
- (3.8, 5500)
- (3.5, 5250)


In [None]:
logg = allStarClean["LOGG"]
teff = allStarClean["TEFF"]

mask = logg >= 3.5
mask &= logg <= 0.004*teff - 15.7
mask &= logg <= 0.0007*teff + 0.36
mask &= logg <= -0.0015 * teff + 12.05
mask &= logg >= 0.0012*teff - 2.8

In [None]:
np.sum(mask)

In [None]:
subgiants = allStarClean[mask].to_pandas()

In [None]:
np.sum(subgiants.APOGEE_ID.duplicated())

In [None]:
coords = [
    (3.5, 4800),
    (3.8, 4875),
    (4.1, 5300),
    (3.8, 5500),
    (3.5, 5250),
    (3.5, 4800)
]
y = [c[0] for c in coords]
x = [c[1] for c in coords]

In [None]:
plt.figure(figsize=(10/3, 10/3), rasterized=True, dpi=350)
_, _, _, img = plt.hist2d(allStar["TEFF"], allStar["LOGG"], range=((3000, 7000), (-0.5, 5)), bins=100, norm="log", cmin=3, cmap=arya.style.get_cmap(to_white=True, reverse=True))
img.set_edgecolor("none")


plt.plot(x, y, lw=2, color="k")

plt.gca().invert_yaxis()
plt.gca().invert_xaxis()
plt.xlabel("$T_{\\rm eff}$ (K)")
plt.ylabel("$\\log$ g")
plt.savefig("logg_jack.pdf")

In [None]:
plt.plot(x, y, lw=2, color="k")
plt.scatter(subgiants.TEFF, subgiants.LOGG)
plt.gca().invert_yaxis()
plt.gca().invert_xaxis()
plt.xlabel("$T_{\\rm eff}$ (K)")
plt.ylabel("$\\log$ g")

In [None]:
subgiants_pann = subgiants.set_index("APOGEE_ID").join(astroNN).copy()


In [None]:

subgiants_pann["R_gal"] = subgiants_pann.galr_ANN
subgiants_pann["z_gal"] = subgiants_pann.galz_ANN

subgiants_pann["abs_z"] = np.abs(subgiants_pann.z_gal)

In [None]:
import astropy.coordinates as ac
import astropy.units as u

In [None]:
d =  1 * u.kpc/subgiants_pann.GAIAEDR3_PARALLAX.values

In [None]:
d[d<0] = np.nan

In [None]:
sc = ac.SkyCoord(ra=subgiants_pann.RA.values * u.deg, dec=subgiants_pann.DEC.values * u.deg, distance =d)

gc = sc.transform_to(ac.Galactocentric())

In [None]:
R = (np.sqrt(gc.x**2 + gc.y**2) / u.kpc).decompose().value

In [None]:
z = (gc.z / u.kpc).decompose().value

In [None]:
subgiants_pann["R_gal_gaia"] = R
subgiants_pann["z_gal_gaia"] = z

In [None]:
len(subgiants_pann)

In [None]:
subgiants_pann.to_csv(DATA_DIR + "subgiants.csv")

In [None]:
plt.hist2d(subgiants.FE_H, subgiants.MG_FE, cmin=1, bins=80, range=((-1, 0.5),(-0.15, 0.45)) );


In [None]:
import seaborn as sns

In [None]:
plt.hist(subgiants_pann.R_gal, histtype="step", bins=60, density=True)
sns.kdeplot(subgiants_pann.R_gal)


# Validation

In [None]:
import surp
import matplotlib.pyplot as plt
import seaborn as sns
import arya

In [None]:
subgiants = surp.subgiants

In [None]:
subgiants

In [None]:
subgiants[~np.isclose(subgiants_pann.R_gal, subgiants.R_gal.values)]

In [None]:
sns.kdeplot(subgiants.R_gal)
sns.kdeplot(subgiants_pann.R_gal, ls=":")

plt.xlabel("R ga")


In [None]:
plt.scatter(R, subgiants.R_gal, s=1, alpha=0.3)
plt.xlim(0, 13)
plt.ylim(0, 13)
plt.plot([0,13],[0,13], "k-")

In [None]:
sns.kdeplot(R, clip=(0, 15), label="Gaia")
sns.kdeplot(subgiants_pann.R_gal, label="ANN")
plt.xlim(0, 15)
plt.xlabel("R ga")
plt.legend()

In [None]:
sum(subgiants_pann.abs_z > 3) / len(subgiants_pann)

In [None]:
sns.kdeplot(np.abs(z), label="gaia", clip=(0, 10))

sns.kdeplot(subgiants_pann.abs_z, label="ANN")
plt.xlim(0, 15)
plt.xlabel("R ga")
plt.legend()