In [1]:
# ------------------------------------------------------------------------
#
# TITLE - get_data.ipynb
# PROJECT - mw_dfs
#
# ------------------------------------------------------------------------
#
# Docstrings and metadata:
'''get Gaia DR2 and APOGEE DR16 data

data_dir change to general'''

__author__ = "James Lane"

In [2]:
### Imports
import os
import numpy as np
import dill as pickle

# Set APOGEE version for the package
RESULTS_VERS = 'l33'
# Year 7 is appropriate for DR16 (l33)
apo_combined_select_year = 7
os.environ['RESULTS_VERS'] = RESULTS_VERS

# APOGEE, Gaia
import apogee.tools.read as apread
import apogee.select as apsel
import apogee.tools as apotools
from gaia_tools import xmatch





In [3]:
### Notebook setup
%load_ext autoreload
%autoreload 2

In [4]:
### Keywords

# Force load different data products even they exist (overwrite them)?
force_load_allstar = False
force_load_apogeeSF = False
force_load_statIndx = False
force_load_gaiadr2 = False

# Keywords for APOGEE allstar (see apread.allStar.__doc__)
allstar_main = True # Only select stars in the main survey
allstar_rmdups = True # Remove duplicates
allstar_use_astroNN_abundances = False # replace ASPCAP w/ astroNN abundances
allstar_use_astroNN_distances = True # Add in astroNN distances
allstar_use_astroNN_ages = False # Add in astroNN ages

In [5]:
### Pathing

# Outputs
data_dir = '/geir_data/scr/lane/projects/mw-dfs/data/data_Sept_2021/gaia_apogee/'
allstar_filename = data_dir+'allstar.npy'
apogeeSF_filename = data_dir+'apogeeSF_'+RESULTS_VERS+'.dat'
statIndx_filename = data_dir+'statIndx_'+RESULTS_VERS+'.npy'
gaia2_data_filename = data_dir+'GDR2_'+RESULTS_VERS+'_data.npy'
apogee_matches_filename = data_dir+'GDR2_'+RESULTS_VERS+'_apogee_matches.npy'

# Get APOGEE Data

In [6]:
# Get data
if not force_load_allstar and os.path.exists(allstar_filename):
    print('Loading pre-computed allstar from '+allstar_filename)
    allstar = np.load(allstar_filename)
else:
    print('Loading APOGEE from results version: '+RESULTS_VERS)
    allstar = apread.allStar(main=allstar_main, rmdups=allstar_rmdups, 
        use_astroNN_abundances=allstar_use_astroNN_abundances,
        use_astroNN_distances=allstar_use_astroNN_distances,
        use_astroNN_ages=allstar_use_astroNN_ages)
    print('Saving allstar as: '+allstar_filename)
    np.save(allstar_filename, allstar)
    # Automatically force the rest because you loaded a new allstar
    force_load_apogeeSF = True
    force_load_statIndx = True
    force_load_gaiadr2 = True
##ie

Loading APOGEE from results version: l33

Removing duplicates (might take a while) and caching the duplicate-free file ... (file not cached if use_astroNN=True)

100%|██████████| 105448/105448 [00:08<00:00, 13038.61it/s]


Saving allstar as: /geir_data/scr/lane/projects/mw-dfs/data/data_Sept_2021/gaia_apogee/allstar.npy


# Get the APOGEE statistical sample

In [7]:
# Get selection function
if not force_load_apogeeSF and os.path.exists(apogeeSF_filename):
    print('Loading pre-computed selection function from '+apogeeSF_filename)
    with open(apogeeSF_filename, 'rb') as f:
        apo = pickle.load(f)
    ##wi
else:
    apo = apsel.apogeeCombinedSelect(year=apo_combined_select_year)
    with open(apogeeSF_filename, 'wb') as f:
        pickle.dump(apo,f)
    ##wi
    # Automatically force the rest
    force_load_statIndx = True
    force_load_gaiadr2 = True
##ie

# Make statistical sample
if not force_load_statIndx and os.path.exists(statIndx_filename):
    print('Loading pre-computed statistical sample from '+statIndx_filename)
    statIndx = np.load(statIndx_filename)
else:
    statIndx = apo.determine_statistical(allstar)
    np.save(statIndx_filename, statIndx)
    # Automatically force the rest
    force_load_gaiadr2 = True
##ie

Reading and parsing spectroscopic data; determining statistical sample ...      

100%|██████████| 76665/76665 [19:37<00:00, 65.10it/s]


  aj= data[aktag]*2.5

  ah= data[aktag]*1.55


Reading and parsing spectroscopic data; determining statistical sample ...      

100%|██████████| 99202/99202 [28:44<00:00, 57.53it/s]



Reading and parsing spectroscopic data; determining statistical sample ...      

100%|██████████| 43946/43946 [13:16<00:00, 55.20it/s]


                                                                                

100%|██████████| 261012/261012 [1:14:19<00:00, 58.53it/s]


## Get the Gaia DR2 data

In [8]:
if not force_load_gaiadr2 and os.path.exists(gaia2_data_filename)\
                          and os.path.exists(apogee_matches_filename):
    gaia2_data = np.load(gaia2_data_filename)
    matches_index = np.load(apogee_matches_filename)
else:
    gaia2_data, matches_indx = xmatch.cds(allstar[statIndx], 
                                             colRA='RA', colDec='DEC', 
                                             xcat='vizier:I/345/gaia2', 
                                             gaia_all_columns=True)
    np.save(gaia2_data_filename, gaia2_data)
    np.save(apogee_matches_filename, matches_indx)
##ie

Created TAP+ (v1.2.1) - Connection:                                             
	Host: gea.esac.esa.int
	Use HTTPS: True
	Port: 443
	SSL Port: 443
Created TAP+ (v1.2.1) - Connection:
	Host: geadata.esac.esa.int
	Use HTTPS: True
	Port: 443
	SSL Port: 443
INFO: Query finished. [astroquery.utils.tap.core]
