Need to download all aspcapStar files for N/S (use --dry-run first):

    rsync -aLvz --include "*/" \
    --include "aspcapStar*.fits" --exclude "*" \
    --prune-empty-dirs --progress \
    rsync://dtn.sdss.org/dr16/apogee/spectro/aspcap/r12/l33/apo25m/ apo25m/
    
And same for the apStarLSF files:

    rsync -aLvz --include "*/" \
    --include "apStarLSF*.fits" --exclude "*" \
    --prune-empty-dirs --progress \
    rsync://dtn.sdss.org/dr16/apogee/spectro/redux/r12/stars/apo25m/ apo25m/
    
    rsync -aLvz --include "*/" \
    --include "asStarLSF*.fits" --exclude "*" \
    --prune-empty-dirs --progress \
    rsync://dtn.sdss.org/dr16/apogee/spectro/redux/r12/stars/lco25m/ lco25m/

In [None]:
import os
os.environ['APOGEE_CACHE_PATH'] = "/mnt/ceph/users/apricewhelan/apogee-test/"

import re
import sys
import pathlib
_path = str(pathlib.Path('../').resolve())
if _path not in sys.path:
    sys.path.append(_path)

import corner
from astropy.io import fits
import astropy.coordinates as coord
import astropy.table as at
import astropy.units as u
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from tqdm.auto import tqdm
from scipy.stats import binned_statistic
from astropy.stats import median_absolute_deviation as MAD

from gala.mpl_style import hesperia, laguna

from joaquin.config import all_phot_names, dr
from joaquin.plot import simple_corner, phot_to_label
from joaquin.data import JoaquinData

In [None]:
cache_path = pathlib.Path(f'../cache/{dr}').resolve()
cache_path.mkdir(exist_ok=True, parents=True)

In [None]:
# binaries = at.Table.read(
#     '/mnt/home/apricewhelan/projects/apogee-dr16-vac/catalogs/lnK-0.2_logL3.0_metadata.fits')

In [None]:
# allstar = at.Table.read('/mnt/home/apricewhelan/data/APOGEE_DR17/allStar-dr17-turbo20-beta.fits', 
#                         hdu=1)

allstar = at.Table.read('/mnt/home/apricewhelan/data/APOGEE_DR16/allStar-r12-gaiaedr3.fits',
                        hdu=1)
for colname in ['phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag',
                'parallax', 'parallax_error']:
    allstar[f'GAIAEDR3_{colname.upper()}'] = allstar[colname]

In [None]:
wise = at.Table.read('/mnt/home/apricewhelan/data/APOGEE_DR17/APOGEE-DR17-wise-result.fits.gz')
wise.rename_column('apogee_id', 'APOGEE_ID')

allstar['APOGEE_ID'] = allstar['APOGEE_ID'].astype(str)
wise['APOGEE_ID'] = [x.strip() for x in wise['APOGEE_ID'].astype(str)]

allstar = at.join(allstar, wise, keys='APOGEE_ID', join_type='left')
allstar = at.unique(allstar, keys='APOGEE_ID')

In [None]:
allstar['TELESCOPE'] = np.array([x.strip() for x in allstar['TELESCOPE']])
stars = allstar[np.isin(allstar['TELESCOPE'], ['apo25m', 'lco25m'])]
len(stars)

In [None]:
phot_mask = np.ones(len(stars), dtype=bool)
for name in all_phot_names:
    phot_mask &= (np.isfinite(stars[name]) & 
                  (stars[name] > 0) &
                  (stars[name] < 22))  # MAGIC NUMBER

# 2MASS photometry
for band in ['J', 'H', 'K']:
    phot_mask &= ((stars[f'{band}_ERR'] > 0) &
                  (stars[f'{band}_ERR'] < 0.1))  # MAGIC NUMBER

# WISE photometry
# phot_mask &= np.char.startswith(stars['ph_qual'].astype(str), 'AA')
pattr = re.compile("^([AB][AB])")
phot_mask &= np.array([re.search(pattr, x) is not None
                       for x in stars['ph_qual'].filled('--').astype(str)])

In [None]:
for band in 'JHK':
    print(np.percentile(stars[f'{band}_ERR'][phot_mask], [1, 99.9]))

In [None]:
# binaries_mask = ~np.isin(stars['APOGEE_ID'], binaries['APOGEE_ID'])

In [None]:
bits = np.array([
    0,  # BAD_PIXELS,
    1,  # COMMISSIONING
    3,  # VERY_BRIGHT_NEIGHBOR
    4,  # LOW_SNR
])
starflag_mask = (stars['STARFLAG'] & np.sum(2**bits)) == 0

In [None]:
# TODO: mask binaries?
clean_stars = stars[phot_mask & starflag_mask]  # & binaries_mask
len(stars), len(clean_stars)

### Color-color plot to prune outliers

In [None]:
G_J = clean_stars['GAIAEDR3_PHOT_G_MEAN_MAG'] - clean_stars['J']
J_K = clean_stars['J'] - clean_stars['K']

poly = np.poly1d(np.polyfit(G_J, J_K, deg=1))
xx = np.linspace(0, 10, 25)

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

ax = axes[0]
ax.plot(xx, poly(xx), color='tab:blue', zorder=10, marker='')
ax.scatter(G_J, J_K, 
           c=clean_stars['J'],
           marker='o', alpha=0.4, lw=0, s=4, 
           cmap='cividis_r')
ax.set_xlabel('$G-J$')
ax.set_ylabel('$J-K$')

ax = axes[2]
dcolor = J_K - poly(G_J)
ax.scatter(clean_stars['J'], 
           dcolor, 
           marker='o', alpha=0.4, lw=0, s=4)

bins = np.linspace(5, 15, 25)
binc = 0.5 * (bins[:-1] + bins[1:])
stat = binned_statistic(clean_stars['J'], 
                        dcolor,
                        statistic='median',
                        bins=bins)
stat_std = binned_statistic(clean_stars['J'], 
                            dcolor,
                            statistic=lambda x: 1.5 * MAD(x),
                            bins=bins)

ax.set_xlabel('$J$')
ax.set_ylabel(r'$(J-K) - (J-K)_{\rm pred}$')

ax = axes[1]
ax.scatter(G_J, dcolor, 
           c=clean_stars['LOGG'],
           vmin=0.5, vmax=5.5,
           marker='o', alpha=0.4, lw=0, s=4,
           cmap='cividis_r')

ax.set_xlabel('$G - J$')
ax.set_ylabel(r'$(J-K) - (J-K)_{\rm pred}$')

# MAGIC NUMBERS
ax.axhline(np.median(dcolor) - 6 * np.std(dcolor))
ax.axhline(np.median(dcolor) + 6 * np.std(dcolor))
# ax.axhline(np.median(dcolor) - 8 * 1.5 * MAD(dcolor), color='tab:green')
# ax.axhline(np.median(dcolor) + 8 * 1.5 * MAD(dcolor), color='tab:green')

fig.tight_layout()
fig.set_facecolor('w')

In [None]:
# MAGIC NUMBER: Good ol' standard 6 sigma cut
dcolor_mask = np.abs(dcolor - np.median(dcolor)) < 6 * np.std(dcolor)
dcolor_mask &= (clean_stars['H'] - clean_stars['w2mpro']) > -0.5
dcolor_mask &= (clean_stars['w1mpro'] - clean_stars['w2mpro']) > -1
dcolor_mask.sum()

In [None]:
parent = clean_stars[dcolor_mask]

In [None]:
# Photometry / colors:
plot_X = []
labels = []

colors = [
    ('GAIAEDR3_PHOT_BP_MEAN_MAG', 'GAIAEDR3_PHOT_RP_MEAN_MAG'),
    ('J', 'K'),
    ('w1mpro', 'w2mpro'),
    ('GAIAEDR3_PHOT_G_MEAN_MAG', 'J'),
    ('H', 'w2mpro')
]
for i, (p1, p2) in enumerate(colors):
    vals = parent[p1] - parent[p2]
    plot_X.append(vals)
    
    lbl1 = p1
    if p1 in phot_to_label:
        lbl1 = phot_to_label[p1]
    
    lbl2 = p2
    if p2 in phot_to_label:
        lbl2 = phot_to_label[p2]
    
    lbl = f"{lbl1} $-$ {lbl2}"
    labels.append(lbl)
    
plot_X = np.array(plot_X).T

In [None]:
fig, axes = simple_corner(
    plot_X, 
    colorbar=True,
    labels=labels,
    alpha=0.2, ms=1.)

fig.set_facecolor('w')

In [None]:
fig, *_, cb = simple_corner(
    plot_X, 
    labels=labels,
    alpha=0.2, 
    color_by=parent['LOGG'],
    vmin=0.5, vmax=5., cmap='turbo',
    colorbar=True)
fig.set_facecolor('w')
cb.set_label(r'$\log g$')
cb.ax.set_aspect(40)

In [None]:
fig, *_, cb = simple_corner(
    plot_X, 
    labels=labels,
    alpha=0.2, 
    color_by=np.log10(parent['SFD_EBV']),
    vmin=-1, vmax=0, cmap='turbo',
    colorbar=True)
fig.set_facecolor('w')
cb.set_label(r'$\log_{10} ({\rm SFD EBV})$')
cb.ax.set_aspect(40)

In [None]:
len(allstar), len(clean_stars), len(parent)

In [None]:
parent.write(cache_path / 'parent-sample.fits', overwrite=True)

### Build the design matrix data for the full parent sample

In [None]:
parent_data = JoaquinData.from_stars(
    parent, cache_file='parent-sample')

### Define and save a global spectral mask based on the fraction of pixels over the full parent sample that are masked:

In [None]:
global_spec_mask = parent_data.spec_bad_masks.sum(axis=0) > 0.25
np.save(cache_path / 'global_spec_bad_mask.npy', 
        global_spec_mask)
global_spec_mask.sum()