## DR17:

Need to download all aspcapStar files for N/S (use --dry-run first):

    rsync -aLvz --include "*/" \
    --include "aspcapStar*.fits" --exclude "*" \
    --prune-empty-dirs --progress \
    rsync://sdss@dtn01.sdss.utah.edu/sas/apogeework/apogee/spectro/aspcap/dr17/synspec/apo25m/ apo25m/
    
    rsync -aLvz --include "*/" \
    --include "aspcapStar*.fits" --exclude "*" \
    --prune-empty-dirs --progress \
    rsync://sdss@dtn01.sdss.utah.edu/sas/apogeework/apogee/spectro/aspcap/dr17/synspec/lco25m/ lco25m/
    
And same for the apStarLSF files:

    rsync -aLvz --include "*/" \
    --include "apStarLSF*.fits" --exclude "*" \
    --prune-empty-dirs --progress \
    --dry-run \
    rsync://sdss@dtn01.sdss.utah.edu/sas/apogeework/apogee/spectro/redux/dr17/stars/apo25m/ apo25m/
    
    rsync -aLvz --include "*/" \
    --include "asStarLSF*.fits" --exclude "*" \
    --prune-empty-dirs --progress \
    --dry-run \
    rsync://sdss@dtn01.sdss.utah.edu/sas/apogeework/apogee/spectro/redux/dr17/stars/lco25m/ lco25m/

In [None]:
import pathlib
import re

from astropy.io import fits
import astropy.coordinates as coord
from astropy.stats import median_absolute_deviation as MAD
import astropy.table as at
import astropy.units as u
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from scipy.stats import binned_statistic
from tqdm.auto import tqdm

from joaquin.config import Config
from joaquin.plot import phot_to_label, simple_corner
from joaquin.apogee_data import make_apogee_X
from joaquin.data import JoaquinData

In [None]:
conf = Config('../config.yml')

In [None]:
allstar = at.Table.read('/mnt/home/apricewhelan/data/APOGEE_DR17/allStar-dr17-synspec.fits', 
                        hdu=1)

gaia_xm = at.Table.read('/mnt/home/apricewhelan/data/APOGEE_DR17/allStar-dr17-synspec-gaiaedr3-xm.fits',
                        hdu=1)

allstar = at.join(allstar, gaia_xm, join_type='left')
allstar = at.unique(allstar, 'APSTAR_ID')

In [None]:
wise = at.Table.read('/mnt/home/apricewhelan/data/APOGEE_DR17/APOGEE-DR17-wise-result.fits')

allstar['APOGEE_ID'] = allstar['APOGEE_ID'].astype(str)
wise['APOGEE_ID'] = [x.strip() for x in wise['APOGEE_ID'].astype(str)]

allstar = at.join(allstar, wise, keys='APOGEE_ID', join_type='left')
allstar = at.unique(allstar, keys='APSTAR_ID')

In [None]:
allstar['TELESCOPE'] = np.array([x.strip() for x in allstar['TELESCOPE']])
stars = allstar[np.isin(allstar['TELESCOPE'], ['apo25m', 'lco25m'])]
len(stars)

# Filter out bad photometry

This is currently very aggressive (mag errors < 0.1, flux S/N > 10).

In [None]:
conf.phot_names

In [None]:
phot_mask = np.ones(len(stars), dtype=bool)
for name in conf.phot_names:
    phot_mask &= (np.isfinite(stars[name]) & 
                  (stars[name] > 0) &
                  (stars[name] < 22))  # MAGIC NUMBER

# Gaia photometry
for band in ['g', 'bp', 'rp']:
    mag_err = 1 / stars[f'phot_{band}_mean_flux_over_error']
    phot_mask &= (mag_err < 0.1)  # MAGIC NUMBER
    
# 2MASS photometry
for band in ['J', 'H', 'K']:
    phot_mask &= ((stars[f'{band}_ERR'] > 0) &
                  (stars[f'{band}_ERR'] < 0.1))  # MAGIC NUMBER

# WISE photometry
phot_mask &= stars['w1mpro_error'] < 0.1

# phot_mask &= np.char.startswith(stars['ph_qual'].astype(str), 'AA')
pattr = re.compile("^([AB][AB])")
phot_mask &= np.array([re.search(pattr, x) is not None
                       for x in stars['ph_qual'].astype(str)])

phot_mask.sum(), len(phot_mask)

In [None]:
for band in ['g', 'bp', 'rp']:
    mag_err = 1 / stars[f'phot_{band}_mean_flux_over_error']
    print(band, np.percentile(mag_err[phot_mask], [1, 99.9]))
    
for band in 'JHK':
    print(band, np.percentile(stars[f'{band}_ERR'][phot_mask], [1, 99.9]))
    
for band in ['w1mpro', 'w2mpro']:
    print(band, np.percentile(stars[f'{band}_error'][phot_mask], [1, 99.9]))

### TODO: filter out SB1 binaries from The Joker VAC?

# Filter on starflag bitmasks for bad spectra:

In [None]:
bits = np.array([
    0,  # BAD_PIXELS,
    1,  # COMMISSIONING
    3,  # VERY_BRIGHT_NEIGHBOR
    4,  # LOW_SNR
])
starflag_mask = (stars['STARFLAG'] & np.sum(2**bits)) == 0
starflag_mask.sum()

In [None]:
ncomponents_mask = stars['N_COMPONENTS'] == 1
ncomponents_mask.sum()

# Full set of cleaned stars:

In [None]:
clean_stars = stars[phot_mask & starflag_mask & ncomponents_mask]  # & binaries_mask
len(stars), len(clean_stars), len(np.unique(clean_stars['APOGEE_ID']))

### Color-color plot to prune outliers

In [None]:
G_J = clean_stars['phot_g_mean_mag'] - clean_stars['J']
J_K = clean_stars['J'] - clean_stars['K']

poly = np.poly1d(np.polyfit(G_J, J_K, deg=1))
xx = np.linspace(0, 10, 25)

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

ax = axes[0]
ax.plot(xx, poly(xx), color='tab:blue', zorder=10, marker='')
ax.scatter(G_J, J_K, 
           c=clean_stars['J'],
           marker='o', alpha=0.4, lw=0, s=4, 
           cmap='cividis_r')
ax.set_xlabel('$G-J$')
ax.set_ylabel('$J-K$')

ax = axes[2]
dcolor = J_K - poly(G_J)
ax.scatter(clean_stars['J'], 
           dcolor, 
           marker='o', alpha=0.4, lw=0, s=4)

bins = np.linspace(5, 15, 25)
binc = 0.5 * (bins[:-1] + bins[1:])
stat = binned_statistic(clean_stars['J'], 
                        dcolor,
                        statistic='median',
                        bins=bins)
stat_std = binned_statistic(clean_stars['J'], 
                            dcolor,
                            statistic=lambda x: 1.5 * MAD(x),
                            bins=bins)

ax.set_xlabel('$J$')
ax.set_ylabel(r'$(J-K) - (J-K)_{\rm pred}$')

ax = axes[1]
ax.scatter(G_J, dcolor, 
           c=clean_stars['LOGG'],
           vmin=0.5, vmax=5.5,
           marker='o', alpha=0.4, lw=0, s=4,
           cmap='cividis_r')

ax.set_xlabel('$G - J$')
ax.set_ylabel(r'$(J-K) - (J-K)_{\rm pred}$')

# MAGIC NUMBERS
ax.axhline(np.median(dcolor) - 8 * np.std(dcolor))
ax.axhline(np.median(dcolor) + 8 * np.std(dcolor))
# ax.axhline(np.median(dcolor) - 8 * 1.5 * MAD(dcolor), color='tab:green')
# ax.axhline(np.median(dcolor) + 8 * 1.5 * MAD(dcolor), color='tab:green')

fig.tight_layout()
fig.set_facecolor('w')

In [None]:
# MAGIC NUMBER: Good ol' standard 8 sigma cut
dcolor_mask = np.abs(dcolor - np.median(dcolor)) < 8 * np.std(dcolor)
dcolor_mask &= (clean_stars['H'] - clean_stars['w2mpro']) > -0.5
dcolor_mask &= (clean_stars['w1mpro'] - clean_stars['w2mpro']) > -1
dcolor_mask.sum()

In [None]:
parent = clean_stars[dcolor_mask]

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(parent['RA'], parent['DEC'], 
         marker='o', mew=0, ls='none', alpha=0.1, ms=1)
plt.tight_layout()

In [None]:
a

In [None]:
# Photometry / colors:
plot_X = []
labels = []

colors = conf.neighborhood_color_names
# [
#     ('phot_bp_mean_mag', 'phot_rp_mean_mag'),
#     ('J', 'K'),
#     ('w1mpro', 'w2mpro'),
#     ('phot_g_mean_mag', 'J'),
#     ('H', 'w2mpro')
# ]
for i, (p1, p2) in enumerate(colors):
    vals = parent[p1] - parent[p2]
    plot_X.append(vals)
    
    lbl1 = p1
    if p1 in phot_to_label:
        lbl1 = phot_to_label[p1]
    
    lbl2 = p2
    if p2 in phot_to_label:
        lbl2 = phot_to_label[p2]
    
    lbl = f"{lbl1} $-$ {lbl2}"
    labels.append(lbl)
    
plot_X = np.array(plot_X).T

In [None]:
fig, axes = simple_corner(
    plot_X, 
    colorbar=True,
    labels=labels,
    alpha=0.2, ms=1.)

fig.set_facecolor('w')

In [None]:
len(allstar), len(clean_stars), len(parent)

In [None]:
parent.write(conf.parent_sample_source_file, overwrite=True)

### Build the design matrix data for the full parent sample

In [None]:
parent_data = JoaquinData.from_stars(
    conf, parent, make_apogee_X, 
    cache_file=conf.parent_sample_cache_file)