In [None]:
# ------------------------------------------------------------------------
#
# TITLE - clean_data.ipynb
# AUTHOR - James Lane
# PROJECT - ges-mass
#
# ------------------------------------------------------------------------
#
# Docstrings and metadata:
'''Clean the Gaia-APOGEE data
'''

__author__ = "James Lane"

In [None]:
### Imports

## Basic
import numpy as np, pdb, sys, os, dill as pickle
import matplotlib.pyplot as plt
import astropy.units as apu
# import kimmy

## galpy
from galpy import orbit
from galpy import potential
from galpy import actionAngle as aA

sys.path.append('../../src/')
from ges_mass import util as putil

In [None]:
# Notebook setup
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

In [None]:
# Keywords and pathing
cdict = putil.load_config_to_dict()
keywords = ['DATA_DIR','RO','VO','ZO']
data_dir_base,ro,vo,zo = putil.parse_config_dict(cdict,keywords)
data_dir = data_dir_base+'gaia_apogee/'
out_dir = data_dir_base+'gaia_apogee_processed/'

# APOGEE and Gaia info
apogee_dr = 'dr16' # dr16 or dr17
apogee_results_vers = 'l33' # l33 for dr16, unknown for dr17
gaia_dr = 'dr2' # dr2 or edr3
gaia_apogee_dir = 'apogee_'+apogee_dr+'_'+apogee_results_vers+'_gaia_'+gaia_dr+'/'
os.makedirs(out_dir+gaia_apogee_dir,exist_ok=True)

In [None]:
# Potential
mwpot = potential.MWPotential2014
potential.turn_physical_on(mwpot,ro=ro,vo=vo)
phi0 = potential.evaluatePotentials(mwpot,1e10,0).value

# Get data

### APOGEE

In [None]:
# Load APOGEE data
allstar_filename = data_dir+gaia_apogee_dir+'apogee_allstar.npy'
print('APOGEE data release is: '+apogee_dr+', and results version is: '+apogee_results_vers)
print('Loading APOGEE from '+allstar_filename)
allstar = np.load(allstar_filename)
print(str(len(allstar))+' stars in total sample.')

# load APOGEE statistical sample index
apogee_stat_indx_filename = data_dir+gaia_apogee_dir+'apogee_statIndx.npy'
print('\nLoading APOGEE DR16 statistical sample from '+apogee_stat_indx_filename)
apogee_stat_indx = np.load(apogee_stat_indx_filename)
print(str(np.sum(apogee_stat_indx))+' stars in statistical sample.')

# Gaia data and Gaia-APOGEE match index
gaia_data_filename = data_dir+gaia_apogee_dir+'gaia_data.npy'
gaia_apogee_matches_filename = data_dir+gaia_apogee_dir+'gaia_apogee_matches.npy'
print('\nGaia data release is: '+gaia_dr)
print('Loading Gaia catalog from '+gaia_data_filename)
gaia_data = np.load(gaia_data_filename, allow_pickle=True)
print('Loading Gaia-APOGEE matches from '+gaia_apogee_matches_filename)
gaia_apogee_matches_indx = np.load(gaia_apogee_matches_filename)

# Apply the statistical sample index and Gaia-APOGEE matching index
allstar_gaia = allstar[apogee_stat_indx][gaia_apogee_matches_indx]

# Clean data and make kinematics

The Staeckel deltas and eccentricities may take awhile

In [None]:
# Should have defined 6D kinematics for eccentricities
input_mask = np.isfinite(gaia_data['RA']) &\
             np.isfinite(gaia_data['DEC']) &\
             np.isfinite(gaia_data['pmra']) &\
             np.isfinite(gaia_data['pmdec']) &\
             np.isfinite(allstar_gaia['weighted_dist']) &\
             np.isfinite(allstar_gaia['VHELIO_AVG'])

allstar_input = allstar_gaia[input_mask]
gaia_input = gaia_data[input_mask]

In [None]:
# Make coordinate array -> orbits
vxvv = np.array([gaia_input['RA'],
                 gaia_input['DEC'],
                 allstar_input['weighted_dist']/1000,
                 gaia_input['pmra'],
                 gaia_input['pmdec'],
                 allstar_input['VHELIO_AVG']
                 ]).T
orbs_input = orbit.Orbit(vxvv=vxvv, radec=True, ro=ro, vo=vo, zo=zo)

In [None]:
orbs_input.z()

In [None]:
# Force the calculation of new input kinematic quantities even if stashed
# ones exist?
force_kinematics = False
input_kinematics_filename = out_dir+gaia_apogee_dir+'input_kinematics.npy'

# Kinematics are stashed because they take some time to compute
if force_kinematics or not os.path.exists(input_kinematics_filename):
    aAS = aA.actionAngleStaeckel(pot=mwpot, delta=0.4, ro=ro, vo=vo, zo=zo, c=True)
    print('Calculating Staeckel deltas...')
    deltas_input = aA.estimateDeltaStaeckel(mwpot, orbs_input.R(), orbs_input.z(), no_median=True)
    print('Calculating eccentricities...')
    ecc_input,zmax_input,rperi_input,rapo_input = aAS.EccZmaxRperiRap(orbs_input, 
        delta=deltas_input, use_physical=True, c=True)
    print('Calculating actions')
    accs_freqs = aAS.actionsFreqs(orbs_input, use_physical=True, 
                                  delta=deltas_input, c=True)
    E_input = orbs_input.E(pot=mwpot)
    
    deltas_input = deltas_input.value
    ecc_input = ecc_input.value
    zmax_input = zmax_input.value
    rperi_input = rperi_input.value
    rapo_input = rapo_input.value
    E_input = E_input.value
    jr_input = accs_freqs[0].value
    Lz_input = accs_freqs[1].value
    jz_input = accs_freqs[2].value
    
    eELzs_input = np.array([ecc_input,E_input,Lz_input])
    accs_input = np.array([jr_input,Lz_input,jz_input])
    orbextr_input = np.array([zmax_input,rperi_input,rapo_input])
    print('Saving deltas, eELz, actions, extrema to '+input_kinematics_filename)
    with open(input_kinematics_filename,'wb') as f:
        pickle.dump([deltas_input,eELzs_input,accs_input,orbextr_input],f)
else:
    print('Loading eccentricities and deltas from '+input_kinematics_filename)
    with open(input_kinematics_filename,'rb') as f:
        deltas_input,eELzs_input,accs_input,orbextr_input = \
            pickle.load(f)
##fi

In [None]:
allstar_input_locid = allstar_input['LOCATION_ID'] # Prevents unneccesary querying

# Cut bulge fields. Within 20 degrees of the galactic center
omask_bulge = ~(((allstar_input['GLON'] > 340.) |\
                 (allstar_input['GLON'] < 20.)) &\
                (np.fabs(allstar_input['GLAT']) < 20.)
               )

# Cut high fractional distance uncertainty, undefined eccentricity, undefined alpha/Fe, 
# undefined Fe, high log(g) uncertainty
omask_quality = ((allstar_input['weighted_dist_error']/allstar_input['weighted_dist'] < 0.2) &\
                 (np.isfinite(eELzs_input[0])) &\
                 (allstar_input['MG_FE'] > -9999) &\
                 (allstar_input['FE_H'] > -9999) &\
                 (allstar_input['LOGG_ERR'] < 0.1)
                )

omask_gc = ((allstar_input_locid != 2011) &\
            (allstar_input_locid != 4353) &\
            (allstar_input_locid != 5093) &\
            (allstar_input_locid != 5229) &\
            (allstar_input_locid != 5294) &\
            (allstar_input_locid != 5295) &\
            (allstar_input_locid != 5296) &\
            (allstar_input_locid != 5297) &\
            (allstar_input_locid != 5298) &\
            (allstar_input_locid != 5299) &\
            (allstar_input_locid != 5300) &\
            (allstar_input_locid != 5325) &\
            (allstar_input_locid != 5328) &\
            (allstar_input_locid != 5329) &\
            (allstar_input_locid != 5438) &\
            (allstar_input_locid != 5528) &\
            (allstar_input_locid != 5529) &\
            (allstar_input_locid != 5744) &\
            (allstar_input_locid != 5801)
           )

omask = omask_bulge & omask_quality & omask_gc

gaia_omask = gaia_input[omask]
allstar_omask = allstar_input[omask]
os_omask = orbs_input[omask]
eELzs_omask = eELzs_input[:,omask]
accs_omask = accs_input[:,omask]
orbextr_omask = orbextr_input[:,omask]

print('Number of good stars: '+str(len(os_omask)))

In [None]:
omask_kinematics_filename = out_dir+gaia_apogee_dir+'clean_kinematics.npy'
with open(omask_kinematics_filename,'wb') as f:
    pickle.dump([gaia_omask,allstar_omask,os_omask,eELzs_omask,accs_omask,orbextr_omask],f)
##pkl