In [3]:
# ----------------------------------------------------------------------------
#
# TITLE - gaiadr2-apogee_dr14_dataset.ipynb
# AUTHOR - James Lane
# PROJECT - AST 1501
#
# ----------------------------------------------------------------------------
#
# Docstrings and metadata:
'''
Prepare the Gaia DR2 - APOGEE DR14 dataset.
'''

__author__ = "James Lane"

In [4]:
### Imports

## Basic
import numpy as np
import sys, os, pdb

## Astropy
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy import table
from astropy import units as apu
from astropy.coordinates import CartesianDifferential

## galpy & astroNN
from galpy.util import bovy_coords as gpcoords
import astroNN.apogee

In [5]:
### Change Jupyter settings

from IPython.core.interactiveshell import InteractiveShell
from IPython.display import Markdown, display
from IPython.core.debugger import set_trace

# Matplotlib for notebooks
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# pretty print all cell's output and not just the last one
InteractiveShell.ast_node_interactivity = "all"

# Print using markdown
def printmd(string, color=None):
    colorstr = "<span style='color:{}'>{}</span>".format(color, string)
    display(Markdown(colorstr))

# Meta

## Future work
- Include errors on Galactocentric quantities. Using MC approach?
- Output abundances, $\log g$ and $T_{eff}$ along with kinematic parameters

## Information
- APOGEE DR14 data model listed [here](https://data.sdss.org/datamodel/files/APOGEE_REDUX/APRED_VERS/APSTAR_VERS/ASPCAP_VERS/RESULTS_VERS/allStar.html)

# Load
Load AstroNN catalogs and APOGEE catalogs

In [6]:
### Load catalogs

# Names
abundance_catalog = '../../data/astronn/astroNN_apogee_dr14_catalog.fits'
distance_catalog = '../../data/astronn/apogee_dr14_nn_dist_0562.fits'
apogee_catalog = astroNN.apogee.allstar(dr=14)

# Load and extract data
abund_file = fits.open(abundance_catalog)
abund_data = abund_file[1].data
dist_file = fits.open(distance_catalog)
dist_data = dist_file[1].data
ap_file = fits.open(apogee_catalog)
ap_data = ap_file[1].data

  return f(*args, **kwds)

  from ._conv import register_converters as _register_converters

/Users/JamesLane/Software/Python/External/data/apogee/sdss_local_sas_mirror/dr14/apogee/spectro/redux/r8/stars/l31c/l31c.2/allStar-l31c.2.fits was found!


In [7]:
### For debugging or searching
# print(abund_data.columns)
# print(dist_data.columns)
# print(ap_data.columns)

In [8]:
### Read catalog values

# ID, RA, Dec, logg, abundances, errors
apid = abund_data['APOGEE_ID']
locid = abund_data['LOCATION_ID']
abunds = abund_data['astroNN']
abunds_err = abund_data['astroNN_error']
ra = abund_data['RA']
dec = abund_data['DEC']

# the order of the array is 
#  0     1       2    3     4    5    6     7     8     9     10
# [Teff, log(g), C/H, C1/H, N/H, O/H, Na/H, Mg/H, Al/H, Si/H, P/H, 
# 11   12   13    14    15     16   17    18    19    20    21
# S/H, K/H, Ca/H, Ti/H, Ti2/H, V/H, Cr/H, Mn/H, Fe/H, Co/H, Ni/H]
teff, logg, c, c1, n, o, na, mg, al, si, p, s, k, ca, ti, ti2, va, cr, mn, fe, co, ni = abunds[:].T
steff, slogg, sc, sc1, sn, so, sna, smg, sal, ssi, sp, ss, sk, sca, sti, sti2, sva, scr, smn, sfe, sco, sni = abunds_err[:].T

# Distance, distance error, pmRA, pmDec
dist = dist_data['pc'] / 1000 # In kpc
sdist = dist_data['pc_error'] / 1000 # In kpc
fsdist = sdist / dist
pmra = dist_data['pmra']
pmdec = dist_data['pmdec']

# Radial velocity
vhelio = ap_data['VHELIO_AVG']
svhelio = ap_data['VERR']

# Make into a mega-array for easy cutting. Separate by floats and string
all_data_str = np.array([apid, locid]).T
all_data_flt = np.array([ra, dec, dist, teff, logg, 
                         c, c1, n, o, na, mg, al, si, p, s, k, ca, ti, ti2, va, cr, mn, fe, co, ni,
                         sdist, steff, slogg, 
                         sc, sc1, sn, so, sna, smg, sal, ssi, sp, ss, sk, sca, sti, sti2, sva, scr, smn, sfe, sco, sni,
                         pmra, pmdec, vhelio, svhelio ]).T

# Perform cuts
Cut the data based on:
- $\sigma_{\log g} < 0.2$
- $\sigma_{d}/d < 0.2$
- $\log g < 3$
- $\mu_{\alpha}$ and $\mu_{\delta}$ not $-9999$ or NaN

In [9]:
### Perform cuts

# Find stars with logg error > 0.2 dex, 
# and fractional distance error > 0.2
good_stars = np.where(  ( slogg < 0.2 ) &
                        ( fsdist < 0.2 ) &
                        ( logg < 3 ) &
                        ( pmra != -9999 ) &
                        ( pmdec != -9999 ) &
                        ( ~np.isnan(pmra) ) &
                        ( ~np.isnan(pmdec) )
                     )[0]

print('Number of stars remaining is: '+str(len(good_stars)))
print('This is about '+str(int(100*len(good_stars)/len(apid)))+'%')

# Cut the large arrays
all_data_flt_good = all_data_flt[good_stars]
all_data_str_good = all_data_str[good_stars]

# Unpack the data
apid, locid = all_data_str_good.T
ra, dec, dist, teff, logg, c, c1, n, o, na, mg, al, si, p, s, k, ca, ti, ti2, va, cr, mn, fe, co, ni, sdist, steff, slogg, sc, sc1, sn, so, sna, smg, sal, ssi, sp, ss, sk, sca, sti, sti2, sva, scr, smn, sfe, sco, sni,pmra, pmdec, vhelio, svhelio = all_data_flt_good.T

Number of stars remaining is: 117261
This is about 42%


# Convert 
Convert all data to galactocentric coordinates

In [14]:
### First define the LSR frame:

x_sun = 8.125 # Gravity team
z_sun = 0.0208 # Bennett & Bovy 2018
SgrA_pmra = 6.379 # Reid (2004)
u = 11.1  # }
v = 12.24 # } Schoenrich
w = 7.25  # }
v += (SgrA_pmra * x_sun * 4.74) # Gravity x Reid (2004) SgrA* PM

In [15]:
### Make SkyCoord

# Initiate with LSR kinematics from above.
coords = SkyCoord(ra = ra*apu.degree,
                  dec = dec*apu.degree,
                  distance = dist*apu.kpc,
                  pm_ra_cosdec = pmra*apu.mas/apu.yr,
                  pm_dec = pmdec*apu.mas/apu.yr,
                  radial_velocity = vhelio*apu.km/apu.s,
                  galcen_distance = x_sun*apu.kpc,
                  z_sun = z_sun*apu.kpc,
                  galcen_v_sun = CartesianDifferential([u, v, w]*apu.km/apu.s))

In [16]:
### Transform into new frames

coords_gal = coords.transform_to('galactic')
coords_gc = coords.transform_to('galactocentric')

# Get galactic proper motions in mas/yr
pmll = coords_gal.pm_l_cosb.value
pmbb = coords_gal.pm_b.value

# Get galactocentric positions in kpc
gc_x = coords_gc.x.value # * -1 # Flip for LH
gc_y = coords_gc.y.value
gc_z = coords_gc.z.value

# Get galactocentric velocities in km/s
gc_vx = coords_gc.v_x.value
gc_vy = coords_gc.v_y.value
gc_vz = coords_gc.v_z.value

# Save
Save the new dataset

In [17]:
### Save the new dataset

from astropy import table

# Make names
out_names = ('APOGEE_ID',
             'LOCATION_ID',
             'RA',
             'DEC',
             'DIST',
             'DIST_ERROR',
             'VHELIO',
             'VHELIO_ERROR',
             'PM_RA',
             'PM_DEC',
             'PM_LL',
             'PM_BB',
             'X',
             'Y',
             'Z',
             'VX',
             'VY',
             'VZ',
             'TEFF',
             'TEFF_ERROR',
             'LOGG',
             'LOGG_ERROR',
             'C',
             'C_ERROR',
             'C1',
             'C1_ERROR',
             'N',
             'N_ERROR',
             'O',
             'O_ERROR',
             'NA',
             'NA_ERROR',
             'MG',
             'MG_ERROR',
             'AL',
             'AL_ERROR',
             'SI',
             'SI_ERROR',
             'P',
             'P_ERROR',
             'S',
             'S_ERROR',
             'K',
             'K_ERROR',
             'CA',
             'CA_ERROR',
             'TI',
             'TI_ERROR',
             'TI2',
             'TI2_ERROR',
             'V',
             'V_ERROR',
             'CR',
             'CR_ERROR',
             'MN',
             'MN_ERROR',
             'FE',
             'FE_ERROR',
             'CO',
             'CO_ERROR',
             'NI',
             'NI_ERROR')

# Data types
out_dtypes = ('U20', # ID
              'U20', # ID
              'float', # RA
              'float', # DEC
              'float', # DIST
              'float', # DIST_ERR
              'float', # VHELIO
              'float', # VHELIO_ERR
              'float', # PM_RA
              'float', # PM_DEC
              'float', # PM_LL
              'float', # PM_BB
              'float', # X
              'float', # Y
              'float', # Z
              'float', # VX
              'float', # VY
              'float', # VZ
              'float', # TEFF
              'float', # TEFF_ERROR
              'float', # LOGG
              'float', # LOGG_ERR
              'float', # C
              'float', # C_ERROR
              'float', # C1
              'float', # C1_ERROR
              'float', # N
              'float', # N_ERROR
              'float', # O
              'float', # O_ERROR
              'float', # NA
              'float', # NA_ERROR
              'float', # MG
              'float', # MG_ERROR
              'float', # AL
              'float', # AL_ERROR
              'float', # SI
              'float', # SI_ERROR
              'float', # P
              'float', # P_ERROR
              'float', # S
              'float', # S_ERROR
              'float', # K
              'float', # K_ERROR
              'float', # CA
              'float', # CA_ERROR
              'float', # TI
              'float', # TI_ERROR
              'float', # TI2
              'float', # TI2_ERROR
              'float', # V
              'float', # V_ERROR
              'float', # CR
              'float', # CR_ERROR
              'float', # MN
              'float', # MN_ERROR
              'float', # FE
              'float', # FE_ERROR
              'float', # CO
              'float', # CO_ERROR
              'float', # NI
              'float', # NI_ERROR
             )

#  0     1       2    3     4    5    6     7     8     9     10
# [Teff, log(g), C/H, C1/H, N/H, O/H, Na/H, Mg/H, Al/H, Si/H, P/H, 
# 11   12   13    14    15     16   17    18    19    20    21
# S/H, K/H, Ca/H, Ti/H, Ti2/H, V/H, Cr/H, Mn/H, Fe/H, Co/H, Ni/H]

out_data = [apid,
            locid, 
            ra,
            dec,
            dist,
            sdist,
            vhelio,
            svhelio,
            pmra,
            pmdec,
            pmll,
            pmbb,
            gc_x,
            gc_y,
            gc_z,
            gc_vx,
            gc_vy,
            gc_vz,
            teff,
            steff,
            logg,
            slogg,
            c,
            sc,
            c1,
            sc1,
            n,
            sn,
            o,
            so,
            na,
            sna,
            mg,
            smg,
            al,
            sal,
            si,
            ssi,
            p,
            sp,
            s,
            ss,
            k,
            sk,
            ca,
            sca,
            ti,
            sti,
            ti2,
            sti2,
            va,
            sva,
            cr,
            scr,
            mn,
            smn,
            fe,
            sfe,
            co,
            sco,
            ni,
            sni]

# print(len(out_names))
# print(out_names)

# print(len(out_dtypes))
# print(out_dtypes)

print('Outputing this data...')
for i in range(len(out_data)):
    print('-------')
    print(str(i))
    print(out_names[i])
    print(out_data[i])
    print('length: '+str(len(out_data[i])))

out_tab = table.Table(out_data, names=out_names, dtype=out_dtypes)
out_tab.write('../../data/generated/gaiadr2-apogee_dr14_dataset.FIT', overwrite=True)
print('-------')
print('Done')

# No output
pass;

Outputing this data...
-------
0
APOGEE_ID
['2M00000002+7417074' '2M00000032+5737103' '2M00000211+6327470' ...
 '2M23595676+7918072' '2M23595837+5700444' '2M23595886+5726058']
length: 117261
-------
1
LOCATION_ID
['5046' '4424' '4591' ... '5047' '4264' '4424']
length: 117261
-------
2
RA
[1.03000000e-04 1.33500000e-03 8.80200000e-03 ... 3.59986523e+02
 3.59993224e+02 3.59995265e+02]
length: 117261
-------
3
DEC
[74.285408 57.61953  63.463078 ... 79.302002 57.012356 57.434956]
length: 117261
-------
4
DIST
[2.84632764 0.5783974  4.07469263 ... 2.39293481 4.63687598 2.46687451]
length: 117261
-------
5
DIST_ERROR
[0.52983628 0.0879025  0.59741247 ... 0.26141679 0.36619701 0.21921809]
length: 117261
-------
6
VHELIO
[ -52.16899872  -20.75989914 -107.01399994 ...  -38.98910141  -70.4108963
  -61.15209961]
length: 117261
-------
7
VHELIO_ERROR
[0.00101202 0.0378433  0.00978559 ... 0.00438103 0.0273239  0.010129  ]
length: 117261
-------
8
PM_RA
[ 0.13850269 -0.40114299 -1.07228607 ...  7.27

# End