In [1]:
# ----------------------------------------------------------------------------
#
# TITLE - gaiadr2-apogee_dataset.ipynb
# AUTHOR - James Lane
# PROJECT - AST 1501
#
# ----------------------------------------------------------------------------
#
# Docstrings and metadata:
'''
Prepare the Gaia DR2-APOGEE dataset.
'''

__author__ = "James Lane"

In [2]:
### Imports

## Basic
import numpy as np
import sys, os, pdb

## Astropy
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy import table
from astropy import units as apu
from astropy.coordinates import CartesianDifferential

## galpy & astroNN
from galpy.util import bovy_coords as gpcoords
import astroNN.apogee

In [3]:
### Change Jupyter settings

from IPython.core.interactiveshell import InteractiveShell
from IPython.display import Markdown, display
from IPython.core.debugger import set_trace

# Matplotlib for notebooks
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# pretty print all cell's output and not just the last one
InteractiveShell.ast_node_interactivity = "all"

# Print using markdown
def printmd(string, color=None):
    colorstr = "<span style='color:{}'>{}</span>".format(color, string)
    display(Markdown(colorstr))

# Meta

## Future work
- Include errors on Galactocentric quantities. Using MC approach?
- Output abundances, $\log g$ and $T_{eff}$ along with kinematic parameters

## Information
- APOGEE DR14 data model listed [here](https://data.sdss.org/datamodel/files/APOGEE_REDUX/APRED_VERS/APSTAR_VERS/ASPCAP_VERS/RESULTS_VERS/allStar.html)

# Load
Load AstroNN catalogs and APOGEE catalogs

In [4]:
### Load catalogs

# Names
abundance_catalog = '../../data/astronn/astroNN_apogee_dr14_catalog.fits'
distance_catalog = '../../data/astronn/apogee_dr14_nn_dist_0562.fits'
apogee_catalog = astroNN.apogee.allstar(dr=14)

# Load and extract data
abund_file = fits.open(abundance_catalog)
abund_data = abund_file[1].data
dist_file = fits.open(distance_catalog)
dist_data = dist_file[1].data
ap_file = fits.open(apogee_catalog)
ap_data = ap_file[1].data

  return f(*args, **kwds)

  from ._conv import register_converters as _register_converters

/Users/JamesLane/Software/Python/External/data/apogee/sdss_local_sas_mirror/dr14/apogee/spectro/redux/r8/stars/l31c/l31c.2/allStar-l31c.2.fits was found!


In [15]:
### For debugging or searching
# print(abund_data.columns)
# print(dist_data.columns)
# print(ap_data.columns)

In [17]:
### Read catalog values

# ID, RA, Dec, logg, abundances, errors
apid = abund_data['APOGEE_ID']
locid = abund_data['LOCATION_ID']
abunds = abund_data['astroNN']
abunds_err = abund_data['astroNN_error']
ra = abund_data['RA']
dec = abund_data['DEC']
logg, mg, si, ca, fe = abunds[:,[1,7,9,13,19]].T
slogg, smg, ssi, sca, sfe = abunds_err[:,[1,7,9,13,19]].T

# Distance, distance error, pmRA, pmDec
dist = dist_data['pc'] / 1000 # In kpc
sdist = dist_data['pc_error'] / 1000 # In kpc
fsdist = sdist / dist
pmra = dist_data['pmra']
pmdec = dist_data['pmdec']

# Radial velocity
vhelio = ap_data['VHELIO_AVG']
svhelio = ap_data['VERR']

# Make into a mega-array for easy cutting. Separate by floats and string
all_data_str = np.array([apid, locid]).T
all_data_flt = np.array([ra, dec, logg, mg, si, ca, fe, dist,
                         slogg, smg, ssi, sca, sfe, sdist, pmra, 
                         pmdec, vhelio, svhelio ]).T

# Perform cuts
Cut the data based on:
- $\sigma_{\log g} < 0.2$
- $\sigma_{d}/d < 0.2$
- $\log g < 3$
- $\mu_{\alpha}$ and $\mu_{\delta}$ not $-9999$ or NaN

In [18]:
### Perform cuts

# Find stars with logg error > 0.2 dex, 
# and fractional distance error > 0.2
good_stars = np.where(  ( slogg < 0.2 ) &
                        ( fsdist < 0.2 ) &
                        ( logg < 3 ) &
                        ( pmra != -9999 ) &
                        ( pmdec != -9999 ) &
                        ( ~np.isnan(pmra) ) &
                        ( ~np.isnan(pmdec) )
                     )[0]

print('Number of stars remaining is: '+str(len(good_stars)))
print('This is about '+str(int(100*len(good_stars)/len(apid)))+'%')

# Cut the large arrays
all_data_flt_good = all_data_flt[good_stars]
all_data_str_good = all_data_str[good_stars]

Number of stars remaining is: 117261
This is about 42%


In [19]:
# Unpack the data
apid, locid = all_data_str_good.T
ra, dec, logg, mg, si, ca, fe, dist, slogg, smg, ssi, sca, sfe, sdist, pmra, pmdec, vhelio, svhelio = all_data_flt_good.T

### Make metallicities w.r.t. Iron

mgfe = mg-fe
sife = si-fe
cafe = ca-fe

# Convert 
Convert all data to galactocentric coordinates

In [20]:
### First define the LSR frame:

x_sun = 8.125 # Gravity team
z_sun = 0.0208 # Bennett & Bovy 2018
SgrA_pmra = 6.379 # Reid (2004)
u = 11.1  # }
v = 12.24 # } Schoenrich
w = 7.25  # }
v += (SgrA_pmra * x_sun * 4.74) # Gravity x Reid (2004) SgrA* PM

In [22]:
### Make SkyCoord

# Initiate with LSR kinematics from above.
coords = SkyCoord(ra = ra*apu.degree,
                  dec = dec*apu.degree,
                  distance = dist*apu.kpc,
                  pm_ra_cosdec = pmra*apu.mas/apu.yr,
                  pm_dec = pmdec*apu.mas/apu.yr,
                  radial_velocity = vhelio*apu.km/apu.s,
                  galcen_distance = x_sun*apu.kpc,
                  z_sun = z_sun*apu.kpc,
                  galcen_v_sun = CartesianDifferential([u, v, w]*apu.km/apu.s))

In [23]:
### Transform into new frames

coords_gal = coords.transform_to('galactic')
coords_gc = coords.transform_to('galactocentric')

# Get galactic proper motions in mas/yr
pmll = coords_gal.pm_l_cosb.value
pmbb = coords_gal.pm_b.value

# Get galactocentric positions in kpc
gc_x = coords_gc.x.value # * -1 # Flip for LH
gc_y = coords_gc.y.value
gc_z = coords_gc.z.value

# Get galactocentric velocities in km/s
gc_vx = coords_gc.v_x.value
gc_vy = coords_gc.v_y.value
gc_vz = coords_gc.v_z.value

# Save
Save the new dataset

In [27]:
### Save the new dataset

from astropy import table

# Make names
out_names = ('APOGEE_ID',
             'LOCATION_ID',
             'RA',
             'DEC',
             'DIST',
             'DIST_ERR',
             'VHELIO',
             'VHELIO_ERR',
             'PM_RA',
             'PM_DEC',
             'PM_LL',
             'PM_BB',
             'X',
             'Y',
             'Z',
             'VX',
             'VY',
             'VZ'
            )

# Data types
out_dtypes = ('U20', # ID
              'U20', # ID
              'float', # RA
              'float', # DEC
              'float', # DIST
              'float', # DIST_ERR
              'float', # VHELIO
              'float', # VHELIO_ERR
              'float', # PM_RA
              'float', # PM_DEC
              'float', # PM_LL
              'float', # PM_BB
              'float', # X
              'float', # Y
              'float', # Z
              'float', # VX
              'float', # VY
              'float') # VZ

out_data = [apid,
            locid, 
            ra,
            dec,
            dist,
            sdist,
            vhelio,
            svhelio,
            pmra,
            pmdec,
            pmll,
            pmbb,
            gc_x,
            gc_y,
            gc_z,
            gc_vx,
            gc_vy,
            gc_vz]

out_tab = table.Table(out_data, names=out_names, dtype=out_dtypes)
out_tab.write('../../data/generated/gaiadr2-apogee_dataset.FIT', overwrite=True)

# End