In [None]:
# ------------------------------------------------------------------------
#
# TITLE - make_ksf.ipynb
# AUTHOR - James Lane
# PROJECT - ges_mass
#
# ------------------------------------------------------------------------
#
# Docstrings and metadata:
'''Create the KSF. First create DF models of the halo, then sample kinematics 
on a sparse grid. Finally map onto the effective selection function grid using 
spline interpolation.
'''

__author__ = "James Lane"

In [None]:
### Imports

## Basic
import numpy as np, sys, os, copy, warnings, operator, time
from astropy import units as apu
from matplotlib import pyplot as plt
import matplotlib as mpl
import dill as pickle

## galpy
from galpy import orbit
from galpy import potential
from galpy import actionAngle as aA
from galpy import df
from galpy.util import multi as galpy_multi

## scipy
from scipy import interpolate

## Project-specific
sys.path.insert(0,'../../src/')
from ges_mass import potential as ppotential
from ges_mass import util as putil
from ges_mass import ssf as pssf

### Notebook setup

%matplotlib inline
plt.style.use('../../src/mpl/project.mplstyle') # This must be exactly here
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

### Keywords, Pathing, Loading, Data Preparation
Note may need to manually set DF_VERSION and KSF_VERSION

In [None]:
# %load ../../src/nb_modules/keywords_pathing_loading_data_prep.py
## Keywords
cdict = putil.load_config_to_dict()
keywords = ['BASE_DIR','APOGEE_DR','APOGEE_RESULTS_VERS','GAIA_DR','NDMOD',
            'DMOD_MIN','DMOD_MAX','LOGG_MIN','LOGG_MAX','FEH_MIN','FEH_MAX',
            'FEH_MIN_GSE','FEH_MAX_GSE','DF_VERSION','KSF_VERSION','NPROCS',
            'RO','VO','ZO']
base_dir,apogee_dr,apogee_results_vers,gaia_dr,ndmod,dmod_min,dmod_max,\
    logg_min,logg_max,feh_min,feh_max,feh_min_gse,feh_max_gse,df_version,\
    ksf_version,nprocs,ro,vo,zo = putil.parse_config_dict(cdict,keywords)
logg_range = [logg_min,logg_max]
feh_range = [feh_min,feh_max]
feh_range_gse = [feh_min_gse,feh_max_gse]
feh_range_all = [feh_min,feh_max_gse]
# feh_range_fit = copy.deepcopy( # Need to choose here

# # Manually override df and ksf information if necessary
# df_version = 'v1.1_alpha35_rc30'
# ksf_version = 'v1_beta_03_09_5050mix'
df_version = 'v4_lb_powerspher_hb_hernquist'
ksf_version = 'v4.01_beta_03_6e8_08_1.5e8'

## Pathing
fit_paths = putil.prepare_paths(base_dir,apogee_dr,apogee_results_vers,gaia_dr,
                                df_version,ksf_version)
data_dir,version_dir,ga_dir,gap_dir,df_dir,ksf_dir,fit_dir = fit_paths

## Filenames
fit_filenames = putil.prepare_filenames(ga_dir,gap_dir,feh_range_gse)
apogee_SF_filename,apogee_effSF_filename,apogee_effSF_mask_filename,\
    iso_grid_filename,clean_kinematics_filename = fit_filenames

## File loading and data preparation
fit_stuff,other_stuff = putil.prepare_fitting(fit_filenames,
    [ndmod,dmod_min,dmod_max],ro,zo,return_other=True)
apogee_effSF_mask,dmap,iso_grid,jkmins,dmods,ds,effsel_grid,apof,\
    allstar_nomask,orbs_nomask = fit_stuff
Rgrid,phigrid,zgrid = effsel_grid
# apogee_SF,apogee_effSF_grid_inclArea,apogee_effSF_grid_inclArea_Jac = other_stuff

# ## Load the APOGEE field information, can also similarly load 
# ## '...apogee_field_glons.npy', '...apogee_field_glats.npy', 
# ## '...apogee_field_location_ids.npy'
apogee_fields = np.load(ga_dir+'apogee_fields.npy')

### Extra files and parameters for this notebook

In [None]:
# Filenames
fig_dir = './fig/'
df_filename = df_dir+'dfs.pkl'
kinematics_filename = df_dir+'kinematics_ksf_correction.pkl'
allstar_filename = ga_dir+'apogee_allstar.npy'
apogee_stat_indx_filename = ga_dir+'apogee_statIndx.npy'
gaia_data_filename = ga_dir+'gaia_data.npy'
gaia_apogee_matches_filename = ga_dir+'gaia_apogee_matches.npy'

# Forcing
force_dfs = True # New DFs
force_kinematics = True # New orbit samples
force_splines = True # New splines of completeness and purity

## Make potential and DFs

In [None]:
## Interpolated Milky Way potential
rmin = 1/ro # 1 kpc
rmax = 80/ro # 80 kpc
rmin_interp = rmin/2.
rmax_interp = rmax*2.
ngrid = 1601
interpot = ppotential.make_interpolated_mwpot(mwpot='MWPotential2014',
    rmin=rmin_interp, rmax=rmax_interp, ngrid=ngrid, ro=ro, vo=vo, 
    match_type='mass')
mwpot = potential.MWPotential2014
potential.turn_physical_on(interpot,ro=ro,vo=vo)
potential.turn_physical_on(mwpot,ro=ro,vo=vo)
phi0 = potential.evaluatePotentials(mwpot,1e12,0).value

## Stellar halo density potential

# # Lane+ 2022 model 
# alpha = 3.5 # halo density inner power law slope
# rc = 30*apu.kpc
# denspot = potential.PowerSphericalPotentialwCutoff(amp=1., r1=1.,
#     alpha=alpha, rc=rc, ro=ro, vo=vo)
# potential.turn_physical_on(denspot,ro=ro,vo=vo)

# Or make a denspot each for lowbeta and highbeta + give normalizing information
alpha_lb = 2.5
denspot_lb = potential.PowerSphericalPotential(amp=1., r1=1., alpha=alpha_lb,
    ro=ro, vo=vo)
potential.turn_physical_on(denspot_lb,ro=ro,vo=vo)
alpha_hb = 1.
beta_hb = 4.
a_hb = 20.*apu.kpc
denspot_hb = potential.TwoPowerSphericalPotential(amp=1., alpha=alpha_hb,
    beta=beta_hb, a=a_hb, ro=ro, vo=vo)
potential.turn_physical_on(denspot_hb,ro=ro,vo=vo)

# Give some way to normalize the two density profiles if they're not the same
denspot_norm_rmin = 2*apu.kpc
denspot_norm_rmax = 55*apu.kpc
mass_denspot_lb = 6e8*apu.M_sun
mass_denspot_hb = 1.5e8*apu.M_sun
denspot_lb = ppotential.normalize_potential_from_mass(denspot_lb,
    mass_denspot_lb,denspot_norm_rmin,denspot_norm_rmax)
denspot_hb = ppotential.normalize_potential_from_mass(denspot_hb,
    mass_denspot_hb,denspot_norm_rmin,denspot_norm_rmax)
assert np.isclose((potential.mass(denspot_lb,denspot_norm_rmax)-\
    potential.mass(denspot_lb,denspot_norm_rmin)).value,
    mass_denspot_lb.value),'mass is not close to the target value'
assert np.isclose((potential.mass(denspot_hb,denspot_norm_rmax)-\
    potential.mass(denspot_hb,denspot_norm_rmin)).value,
    mass_denspot_hb.value),'mass is not close to the target value'
    

# Fill in some checks on denspot here for consistency
if 'alpha35_rc30' in df_version:
    assert alpha==3.5
    assert rc.to(apu.kpc).value == 30
if df_version == 'v2_lb_ps_hb_tp':
    assert alpha_lb==2.5
    assert alpha_hb==1.
    assert beta_hb==4 or beta_hb==4.5
    assert a_hb.to(apu.kpc).value==20.
if df_version == 'v3_lb_ps_hb_tp':
    assert alpha_lb==2.5
    assert alpha_hb==1.
    assert beta_hb==4 or beta_hb==4.5
    assert a_hb.to(apu.kpc).value==20.
# if df_verison == 'v3...':
#    assert ...
#    assert ...

In [None]:
# Make DFs
# betas = [0.3,0.4,0.8,0.9]
betas = [0.3,0.8]
# denspots = [denspot]*len(betas)
denspots = [denspot_lb,denspot_hb]
n_dfs = len(betas)
n_samples = 1000 # samples in each distance modulus bin

if force_dfs or not os.path.exists(df_filename):
    dfs = []
    for i in range(len(betas)):
            # DF initialization is noisy
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore")
                dfm = df.constantbetadf(pot=interpot, denspot=denspots[i], 
                                        ro=ro, vo=vo, beta=betas[i], rmax=rmax)
                # Dummy samping to initialize
                _ = dfm.sample(R=np.ones(n_samples)*ro*apu.kpc, 
                    phi=np.zeros(n_samples), z=np.zeros(n_samples), rmin=rmin)
                dfs.append(dfm)
    with open(df_filename,'wb') as f:
        pickle.dump(dfs,f)
else:
    with open(df_filename,'rb') as f:
        print('Loading DFs from '+df_filename)
        dfs = pickle.load(f)
    check_params = ['_beta','_rmin_sampling','_rmax','_pot','_denspot',
                    '_denspot.alpha','_denspot.rc','_denspot.beta','_denspot.a']
    for i in range(len(dfs)):
        print('\ndf['+str(i)+'] properties')
        for j in range(len(check_params)):
            try:
                prop = operator.attrgetter(check_params[j])(dfs[i])
                print(check_params[j]+': '+str(prop))
            except AttributeError:
                print(check_params[j]+': N/A')
        assert dfs[i]._beta == betas[i]

## Gaia and APOGEE data

In [None]:
# Load APOGEE data
print('APOGEE data release is: '+apogee_dr+', and results version is: '+apogee_results_vers)
print('Loading APOGEE from '+allstar_filename)
allstar = np.load(allstar_filename)
print(str(len(allstar))+' stars in total sample.')

# load APOGEE statistical sample index
print('\nLoading APOGEE DR16 statistical sample from '+apogee_stat_indx_filename)
apogee_stat_indx = np.load(apogee_stat_indx_filename)
print(str(np.sum(apogee_stat_indx))+' stars in statistical sample.')

# Gaia data and Gaia-APOGEE match index
print('\nGaia data release is: '+gaia_dr)
print('Loading Gaia catalog from '+gaia_data_filename)
gaia_data = np.load(gaia_data_filename, allow_pickle=True)
print('Loading Gaia-APOGEE matches from '+gaia_apogee_matches_filename)
gaia_apogee_matches_indx = np.load(gaia_apogee_matches_filename)

# Apply the statistical sample index and Gaia-APOGEE matching index
allstar_gaia = allstar[apogee_stat_indx][gaia_apogee_matches_indx]

In [None]:
# Should have defined 6D kinematics for eccentricities
input_mask = np.isfinite(gaia_data['RA']) &\
             np.isfinite(gaia_data['DEC']) &\
             np.isfinite(gaia_data['pmra']) &\
             np.isfinite(gaia_data['pmdec']) &\
             np.isfinite(allstar_gaia['weighted_dist']) &\
             np.isfinite(allstar_gaia['VHELIO_AVG'])

allstar_input = allstar_gaia[input_mask]
gaia_input = gaia_data[input_mask]

# Make coordinate array -> orbits
vxvv = np.array([gaia_input['RA'],
                 gaia_input['DEC'],
                 allstar_input['weighted_dist']/1000,
                 gaia_input['pmra'],
                 gaia_input['pmdec'],
                 allstar_input['VHELIO_AVG']
                 ]).T
orbs_gaia_apo = orbit.Orbit(vxvv=vxvv, radec=True, ro=ro, vo=vo, zo=zo)

# Trim the size of gaia_input and allstar_input by only keeping some fields
gaia_input,allstar_input = putil.trim_gaia_allstar_input(gaia_input,
                                                         allstar_input)

## Get the locations of the APOGEE SF

In [None]:
# Get individual pointing information
ls_pointing = np.load(ga_dir+'apogee_field_glons.npy')
bs_pointing = np.load(ga_dir+'apogee_field_glats.npy')
locids_pointing = np.load(ga_dir+'apogee_field_location_ids.npy')
n_pointing = len(locids_pointing)

# Get individual distance information
n_ds = 21
dmods_individual = np.linspace(dmod_min,dmod_max,num=n_ds) # About 1 to 50 kpc
ds_individual = 10**(dmods_individual/5-2)
print('Distances [kpc] along each LOS where KSF will be calculated:')
print(ds_individual)

# Tile this information to create a grid of pointings x distances
ds_locs = np.tile(ds_individual,reps=len(ls_pointing)) # repeat array one after the other
bs_locs = np.repeat(bs_pointing,repeats=len(ds_individual)) # repeat each element
ls_locs = np.repeat(ls_pointing,repeats=len(ds_individual))
fs_locs = np.repeat(locids_pointing,repeats=len(ds_individual))

In [None]:
# Plot the locations of APOGEE pointings
fig = plt.figure()
ax = fig.add_subplot(111)
ls_plot = copy.deepcopy(ls_pointing)
ls_plot[ ls_plot>180  ] = ls_plot[ ls_plot>180 ]-360
pts = ax.scatter( ls_plot, bs_pointing, c='Black', s=4, zorder=2 )
ax.set_xlabel('$\ell$ [deg]')
ax.set_ylabel('$b$ [deg]')
ax.set_xlim(185,-185)
ax.set_ylim(-95,95)
bulge_patch = mpl.patches.Rectangle(xy=(-20,-20),width=40,height=40,
    edgecolor='Black',facecolor='None', zorder=3)
ax.add_artist(bulge_patch)
fig.show()

## Make kinematics

In [None]:
# Address weird galpy error
ls_locs[0:n_ds] = 90.001 
ls_locs[1050] = 90.001
ls_locs[1060] = 90.001

# Make orbits for each individual location in the pointing x distance grid
vxvvs_locs = np.array([ls_locs,bs_locs,ds_locs,np.zeros_like(ds_locs),
                       np.zeros_like(ds_locs),np.zeros_like(ds_locs)]).T
# Orbits to do coordinate tranformation
orbs_locs = orbit.Orbit(vxvvs_locs,lb=True,ro=ro,vo=vo,zo=zo)
n_locs = len(orbs_locs)

# Undo fudge for galpy error
ls_grid = np.repeat(ls_pointing,repeats=len(ds_individual))

In [None]:
# Find matches into the gaia and apogee orbits
indx,sep2d,dist3d = putil.find_orbit_nearest_neighbor(orbs_locs,orbs_gaia_apo,ro=ro, vo=vo)
print('Max 2D angular separation')
print(sep2d.max())
print('\nMax 3D distance')
print(dist3d.max())

In [None]:
calculate_beta = True
beta_filename = ksf_dir+'betas_ksf.pkl'
if not os.path.exists(kinematics_filename) or force_kinematics:
    aAS = aA.actionAngleStaeckel(pot=mwpot, delta=0.4, ro=ro, vo=vo, zo=zo, c=True)
    do_perturb_orbs = False

    # Calculate deltas only once for each location
    print('Calculating Staeckel deltas...')
    delta = aA.estimateDeltaStaeckel(mwpot, orbs_locs.R(), orbs_locs.z(), 
                                     no_median=True)
    if isinstance(delta,apu.quantity.Quantity):
        delta = delta.to(apu.kpc).value/ro
    
    orbs = []
    eELzs = np.zeros((len(dfs),n_locs,3,n_samples))
    actions = np.zeros((len(dfs),n_locs,3,n_samples))

    # Timing
    t1 = time.time()
    for i in range(len(dfs)):
        print('Doing beta='+str(dfs[i]._beta))
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            results = pssf.calc_kinematics_parallel(nprocs, dfs[i], n_samples,
                orbs_locs, do_perturb_orbs, gaia_input[indx], 
                allstar_input[indx], delta, aAS, mwpot, ro, vo, zo)
        these_orbs,these_eELzs,these_actions = results.T
        orbs.append( list(these_orbs) )
        eELzs[i] = np.stack(these_eELzs)
        actions[i] = np.stack(these_actions)
    if calculate_beta:
        print('Calculating beta...')
        orbs_betas = pssf.calc_beta(orbs)
        np.save(beta_filename,orbs_betas)

    t2 = time.time()
    print('Took '+str(round(t2-t1))+'s')
    print('Saving KSF kinematics to '+kinematics_filename)
    with open(kinematics_filename,'wb') as f:
        pickle.dump([delta,eELzs,actions,orbs],f)
else:
    print('Loading KSF kinematics from '+kinematics_filename)
    with open(kinematics_filename,'rb') as f:
        delta,eELzs,actions,orbs = pickle.load(f)

## Calculate Purity and Completeness & Fit Splines

In [None]:
# Package the kinematics properly according to the beta's we're interested in
# Then validate that choice

low_beta = 0.3
high_beta = 0.8

low_beta_indx = np.where(np.array(betas)==low_beta)[0][0]
high_beta_indx = np.where(np.array(betas)==high_beta)[0][0]

orbs_spline = [orbs[low_beta_indx],orbs[high_beta_indx]]
eELzs_spline = [eELzs[low_beta_indx],eELzs[high_beta_indx]]
actions_spline = [actions[low_beta_indx],actions[high_beta_indx]]
mixture_arr = np.array([1.,1.]) # Equal amounts of low and high beta

# Setting denspots to None forces the purity to be calculated with the 
no_denspot_ksf_versions = ['v1_beta_03_08_5050mix','v1_beta_03_09_5050mix',]
if ksf_version in no_denspot_ksf_versions:
    denspots_spline = [None,None]
else:
    denspots_spline = [denspots[low_beta_indx],denspots[high_beta_indx]]

In [None]:
# # Test to make sure that ksf version matches beta choices
# if ksf_version == 'v1_beta_03_08_5050mix':
#     assert low_beta == 0.3
#     assert high_beta == 0.8
#     assert np.all(mixture_arr == np.array([1.,1.]))
# if ksf_version == 'v1_beta_03_09_5050mix':
#     assert low_beta == 0.3
#     assert high_beta == 0.9
#     assert np.all(mixture_arr == np.array([1.,1.]))
# if ksf_version == 'v2_beta_03_6e8_09_1.5e8':
#     assert low_beta == 0.3
#     assert high_beta == 0.9
#     assert isinstance(denspots[0],potential.Potential)
#     assert isinstance(denspots[1],potential.Potential)
# if ksf_version == 'v1_beta_03_6e8_09_1.5e8':
#     assert low_beta == 0.3
#     assert high_beta == 0.9
#     assert isinstance(denspots[0],potential.Potential)
#     assert isinstance(denspots[1],potential.Potential)

In [None]:
# Selection ellipse dictionary

halo_selection_survey_dict = putil.lane2022_kinematic_selections(version='current')

# # These are old selections from an draft version of Lane+ 2021
# halo_selection_survey_dict = {'vRvT':   [ ['ellipse', [290,0], [110,35]], 
#                                           ['ellipse', [-290,0], [110,35]] ],
#                               'Toomre': [ ['ellipse', [0,300], [35,120]], ],
#                               'ELz':    [ ['ellipse', [0,-1], [300,0.5]], ],
#                               'JRLz':   [ ['ellipse', [0,45], [300,20]], ],
#                               'eLz':    [ ['ellipse', [0,1], [500,0.025]], ],
#                               'AD':     [ ['ellipse', [0,-1], [0.08,0.3]], ]
#                               }

# # These are the new selections from Lane+ 2021
# halo_selection_survey_dict = {'vRvT':   [ ['ellipse', [280,0], [100,40]], 
#                                           ['ellipse', [-280,0], [100,40]] ],
#                               'Toomre': [ ['ellipse', [0,280], [35,100]], ],
#                               'ELz':    [ ['ellipse', [0,-1], [300,0.5]], ],
#                               'JRLz':   [ ['ellipse', [0,45], [300,15]], ],
#                               'eLz':    [ ['ellipse', [0,1], [500,0.05]], ],
#                               'AD':     [ ['ellipse', [0,-1], [0.08,0.3]], ]
#                               }

### Loop over different spaces and create the splines

In [None]:
selec_spaces = [['AD',],
                ['eLz',],
                ['JRLz'],
               ]
for i in range(len(selec_spaces)):
    pssf.make_completeness_purity_splines(selec_spaces[i], orbs_spline, 
        eELzs_spline, actions_spline, mixture_arr, denspots_spline, 
        halo_selection_survey_dict, phi0, 
        [ls_pointing,bs_pointing,locids_pointing], ds_individual, fs_locs, 
        ksf_dir, fig_dir, force_splines=True, force_cp=True, spline_type='both', 
        make_spline_plots=False, n_spline_plots=10)

## Map splines onto the effective selection function grid

In [None]:
for i in range(len(selec_spaces)):
    keffSF_grid = pssf.create_kSF_grid(selec_spaces[i], apogee_fields, ds, 
        ksf_dir, spline_type='both', make_purity_grid=True)