In [None]:
# ------------------------------------------------------------------------
#
# TITLE - visualize_ksf_v3.ipynb
# AUTHOR - James Lane
# PROJECT - ges-mass
#
# ------------------------------------------------------------------------
#
# Docstrings and metadata:
'''Visualize the second generation kinematic effective selection function 
created during the from the results using the fiducial KSF.
'''

__author__ = "James Lane"

In [None]:
### Imports

# Basic
import os, sys, pdb, time, tqdm, warnings, multiprocessing, copy, dill as pickle
import numpy as np

# Matplotlib and plotting 
import matplotlib
import matplotlib.pyplot as plt

# galpy
from galpy import orbit

# scipy
import scipy.interpolate

# Project specific
sys.path.insert(0,'../../src/')
from ges_mass import mass as pmass
from ges_mass import densprofiles as pdens
from ges_mass import util as putil
from ges_mass import plot as pplot

### Notebook setup

%matplotlib inline
plt.style.use('../../src/mpl/project.mplstyle') # This must be exactly here
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

### Colors setup

project_colors = pplot.colors()
rainbow_cmap = project_colors.colourmap('rainbow')

### Keywords, Pathing, Loading, Data Preparation

In [None]:
# %load ../../src/nb_modules/keywords_pathing_loading_data_prep.py
## Keywords
cdict = putil.load_config_to_dict()
keywords = ['BASE_DIR','APOGEE_DR','APOGEE_RESULTS_VERS','GAIA_DR','NDMOD',
            'DMOD_MIN','DMOD_MAX','LOGG_MIN','LOGG_MAX','FEH_MIN','FEH_MAX',
            'FEH_MIN_GSE','FEH_MAX_GSE','DF_VERSION','KSF_VERSION','NPROCS',
            'RO','VO','ZO']
base_dir,apogee_dr,apogee_results_vers,gaia_dr,ndmod,dmod_min,dmod_max,\
    logg_min,logg_max,feh_min,feh_max,feh_min_gse,feh_max_gse,df_version,\
    ksf_version,nprocs,ro,vo,zo = putil.parse_config_dict(cdict,keywords)
logg_range = [logg_min,logg_max]
feh_range = [feh_min,feh_max]
feh_range_gse = [feh_min_gse,feh_max_gse]
feh_range_all = [feh_min,feh_max_gse]
# feh_range_fit = copy.deepcopy( # Need to choose here

# Manually override df and ksf information
df_version = 'v3_lb_ps_hb_tp'
ksf_version = 'v3.01_beta_03_6e8_08_1.5e8'

## Pathing
fit_paths = putil.prepare_paths(base_dir,apogee_dr,apogee_results_vers,gaia_dr,
                                df_version,ksf_version)
data_dir,version_dir,ga_dir,gap_dir,df_dir,ksf_dir,fit_dir = fit_paths
fig_dir = './fig/'

## Filenames
fit_filenames = putil.prepare_filenames(ga_dir,gap_dir,feh_range_gse)
apogee_SF_filename,apogee_effSF_filename,apogee_effSF_mask_filename,\
    iso_grid_filename,clean_kinematics_filename = fit_filenames

## File loading and data preparation
fit_stuff,other_stuff = putil.prepare_fitting(fit_filenames,
    [ndmod,dmod_min,dmod_max],ro,zo,return_other=True)
apogee_effSF_mask,dmap,iso_grid,jkmins,dmods,ds,effsel_grid,apof,\
    allstar_nomask,orbs_nomask = fit_stuff
Rgrid,phigrid,zgrid = effsel_grid
apogee_SF,apogee_effSF_grid_inclArea,apogee_effSF_grid_inclArea_Jac = other_stuff

### Function to plot the kinematic selection function

In [None]:
def plot_ksf(ksf, weights, weights_type, apogee_SF, effSF_mask, effSF_mask_use, 
             fig=None, ax=None, scatter_kwargs={}, add_cbar=True, 
             cbar_kwargs={}, cbar_txt='', add_bulge_box=True, rasterized=True):
    '''plot_ksf:
    
    weights_type and weights combinations are:
    - 'effSF': numpy array of effective selection function with shape 
        (nfield,ndmod)
    - 'dmod': Array of weights of length (ndmod,)
    
    Args:
        ksf (np.array) - Array of size (nfield,ndm) representing the kinematic
            selection function. Should not be combined with the effective 
            selection function
        weights (depends) - Object that determines the weights when 
            marginalizing over line of sight
        weights_type (string) - String specifying what weights is, see above
        apogee_SF () - Saved APOGEE selection function object
        effSF_mask (np.array) - APOGEE effective selection function mask marking 
            fields not included for fitting. Should be shape (nfield,)
        effSF_mask_use (string) - How to use the effective selection function
            mask. Either remove ('remove') or mark ('mark') fields which are 
            not considered in the fit.
        fig (matplotlib.Figure) - Figure object [default None, make the figure]
        ax (matplotlib.Axes) - Axes object [default None, make the axes]
        scatter_kwargs (dict) - Dictionary of kwargs to pass to ax.scatter()
        add_bulge_box (bool) - Add a box to the plot showing the bulge region
        
    Returns
        fig,ax
    '''
    # Assert inputs correct
    assert weights_type in [None,'effSF','dmod']
    if weights_type == 'effSF':
        assert weights.shape == ksf.shape
    elif weights_type == 'dmod':
        assert weights.shape[0] == ksf.shape[1]
        weights = np.repeat(weights[:,None],ksf.shape[0],axis=1).T
    assert effSF_mask_use in [None,'remove','mark']
    
    # Make figure if no axes supplies
    if ax is None:
        if fig is not None:
            ax = fig.add_subplot(111)
        else:
            fig = plt.figure()
            ax = fig.add_subplot(111)
    
    # Get the field pointing locations
    field_glon = apogee_SF._apogeeField['GLON']
    field_glat = apogee_SF._apogeeField['GLAT']
    plot_glon = copy.deepcopy(field_glon)
    plot_glat = copy.deepcopy(field_glat)
    plot_glon[plot_glon > 180] = plot_glon[plot_glon > 180] - 360.
    
    # Consider the effective selection function mask
    if effSF_mask_use == 'remove':
        ksf = ksf[effSF_mask]
        plot_glon = plot_glon[effSF_mask]
        plot_glat = plot_glat[effSF_mask]
        weights = weights[effSF_mask]
        
    elif effSF_mask_use == 'mark':
        pass
    
    # Marginalize the kSF
    ksf_weighted = np.sum(ksf*weights,axis=1)/np.sum(weights,axis=1)        
    
    # Make the plot
    pts = ax.scatter(plot_glon, plot_glat, c=ksf_weighted, zorder=1,
                     rasterized=rasterized, **scatter_kwargs)
    
    if effSF_mask_use == 'mark':
        ax.scatter(plot_glon[~effSF_mask], plot_glat[~effSF_mask], 
                   facecolors='None', edgecolors='Black', linewidths=0.5, 
                   s=pts._sizes[0], zorder=2, rasterized=rasterized)
    
    if add_bulge_box:
        bulge_rect = matplotlib.patches.Rectangle((-20,-20), width=40., 
            height=40., facecolor='None', edgecolor='Black',zorder=3)
        ax.add_artist(bulge_rect)               
  
    ax.set_xlabel('$\ell$ [deg]')
    ax.set_ylabel('$b$ [deg]')
    ax.set_xticks([-180,-120,-60,0,60,120,180])
    ax.set_yticks([-90,-60,-30,0,30,60,90])
    ax.set_ylim(-100,100)
    ax.set_xlim(200,-200)
    
    if add_cbar:
        cbar = fig.colorbar(pts, ax=ax, **cbar_kwargs)
        cbar.set_label(cbar_txt)
    
    return fig,ax

### Load selection functions and purity grids

In [None]:
selec = ['eLz','AD','JRLz']
selec_text = [r'$e-\mathrm{L}_\mathrm{z}$',
              r'AD',
              r'$\mathrm{J}_\mathrm{R}-\mathrm{L}_\mathrm{z}$'
             ]

spline_type = 'linear'
ksfs = []
for i in range(len(selec)):
    ksf_filename = ksf_dir+'kSF_grid_'+spline_type+'_'+selec[i]+'.dat'
    with open(ksf_filename,'rb') as f:
        print('Loading a kinematic selection function from: '+ksf_filename)
        _ksf = pickle.load(f)
    ksfs.append(_ksf)
nfields = len(ksfs[0])
assert nfields == len(apogee_SF._apogeeField)

purs = []
for i in range(len(selec)):
    purity_filename = ksf_dir+'purity_grid_'+spline_type+'_'+selec[i]+'.dat'
    with open(purity_filename,'rb') as f:
        print('Loading a purity grid from: '+ksf_filename)
        _pur = pickle.load(f)
    purs.append(_pur)

## Examine the kSF (completeness)

### Field-by-field kSF vs distance modulus

In [None]:
dm_range = [6.,20.]
field_glat = apogee_SF._apogeeField['GLAT']
cmap = rainbow_cmap
norm = matplotlib.colors.Normalize(vmin=0, vmax=90)
color_by_glat = True
label_fs = 18

# Figure and axes
fig = plt.figure(figsize=(int(4*len(ksfs)),4))
gs = fig.add_gridspec(nrows=4,ncols=3)
axs = [fig.add_subplot(gs[1:,0]),
       fig.add_subplot(gs[1:,1]),
       fig.add_subplot(gs[1:,2])]
axt = fig.add_subplot(gs[0,1])
if color_by_glat:
    cax = fig.add_axes([0.1,0.875,0.15,0.05])

for i in range(len(ksfs)):
    for j in range(nfields):
        if color_by_glat:
            c = cmap(norm(np.abs(field_glat[j])))
            axs[i].plot(dmods, ksfs[i][j], c=c, alpha=0.05, rasterized=True)
        else:
            axs[i].plot(dmods, ksfs[i][j], color='Black', alpha=0.01,
                        rasterized=True)
    axs[i].set_xlabel(r'$\mu$', fontsize=label_fs)
    if i == 0:
        axs[i].set_ylabel(r'$\mathfrak{S}_\mathrm{2}$', fontsize=label_fs)
    else:
        axs[i].tick_params(labelleft=False)
    axs[i].annotate(selec_text[i], xy=(0.05,0.85), xycoords='axes fraction',
                    fontsize=label_fs)
    axs[i].set_xlim(dm_range[0],dm_range[1])
    axs[i].set_ylim(-0.05,1.05)

if color_by_glat:
    cbar = fig.colorbar(matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap),
                        cax=cax, orientation='horizontal')
    cbar.ax.set_xticks([0.,15.,30.,45.,60.,75.,90.])
    cbar.ax.set_xlabel(r'$\vert b_\mathrm{field} \vert$ [deg]', 
                       fontsize=label_fs-2)
    #cbar.ax.set_xticklabels(['0','','30','','60','','90'])

    
# Plot distance modulus
hmask_filename = gap_dir+'halo_apogee_mask.npy'
hmask = np.load(hmask_filename)
halo_dist = allstar_nomask[hmask]['weighted_dist']
halo_dm = 5*np.log10(halo_dist) - 5
nbins = 15
hist_range = [2,19]
halo_dm_n,halo_dm_edges,_ = axt.hist(halo_dm, histtype='step', bins=nbins, 
                                     range=hist_range, density=True, 
                                     color='Black', linewidth=1.5)
halo_dm_cents = (halo_dm_edges[1:]+halo_dm_edges[:-1])/2.
halo_dm_interp = scipy.interpolate.interp1d(halo_dm_cents, halo_dm_n,
                                            kind='cubic', bounds_error=False,
                                            fill_value=0)
_dms = np.linspace(1,20,num=101)
_dm_ns = halo_dm_interp(_dms)
axt.plot(_dms, _dm_ns, color='Red', linewidth=1.)
axt.set_xlim(dm_range[0],dm_range[1])
axt.set_ylim(0,0.3)
axt.set_yticks([0.,0.15,0.3])
axt.set_xticklabels([])
axt.set_ylabel(r'p($\mu$)', fontsize=label_fs)

fig.subplots_adjust(wspace=0.05, hspace=0.3, left=0.05, right=0.95, 
                    bottom=0.15, top=0.95)
# fig.savefig(fig_dir+'ksf_dmod_fields_v2.pdf', dpi=300)
fig.show()

In [None]:
# Manually load the original KSFs for fractional/absolute comparison
ksfs0 = []
ksf0_dir = data_dir+'ksf/apogee_dr16_l33_gaia_edr3/v1_alpha35_rc30/v1_beta_03_08_5050mix/'
for i in range(len(selec)):
    ksf_filename = ksf0_dir+'kSF_grid_'+spline_type+'_'+selec[i]+'.dat'
    print('Loading a kinematic selection function from: '+ksf_filename)
    with open(ksf_filename,'rb') as f:
        _ksf = pickle.load(f)
    ksfs0.append(_ksf)

In [None]:
dm_range = [6.,20.]
field_glat = apogee_SF._apogeeField['GLAT']
cmap = rainbow_cmap
norm = matplotlib.colors.Normalize(vmin=0, vmax=90)
color_by_glat = True
label_fs = 18

# Figure and axes
fig = plt.figure(figsize=(int(4*len(ksfs)),4))
gs = fig.add_gridspec(nrows=4,ncols=3)
axs = [fig.add_subplot(gs[1:,0]),
       fig.add_subplot(gs[1:,1]),
       fig.add_subplot(gs[1:,2])]
# fig = plt.figure(figsize=(int(4*len(ksfs)),7))
# gs = fig.add_gridspec(nrows=7,ncols=3)
# axs = [fig.add_subplot(gs[1:4,0]),
#        fig.add_subplot(gs[1:4,1]),
#        fig.add_subplot(gs[1:4,2]),
#        fig.add_subplot(gs[4:,0]),
#        fig.add_subplot(gs[4:,1]),
#        fig.add_subplot(gs[4:,2]),
#       ]
axt = fig.add_subplot(gs[0,1])
if color_by_glat:
    cax = fig.add_axes([0.1,0.875,0.15,0.05])
    # cax = fig.add_axes([0.1,0.925,0.15,0.03])

for i in range(len(ksfs)):
    for j in range(nfields):
        if color_by_glat:
            c = cmap(norm(np.abs(field_glat[j])))
            axs[i].plot(dmods, ksfs[i][j], c=c, alpha=0.05, rasterized=True)
        else:
            axs[i].plot(dmods, ksfs[i][j], color='Black', alpha=0.01,
                        rasterized=True)
    axs[i].plot(dmods, np.median(ksfs[i],axis=0), color='Black', 
        linewidth=2, linestyle='solid', 
        label=r'median $\mathfrak{S}_\mathrm{2}^{\prime}$')
    axs[i].plot(dmods, np.median(ksfs0[i],axis=0), color='Black', 
        linewidth=2, linestyle='dashed', 
        label=r'median $\mathfrak{S}_\mathrm{2}$')
    # axs[i].set_xlabel(r'$\mu$', fontsize=label_fs)
    if i == 0:
        axs[i].set_ylabel(r'$\mathfrak{S}_\mathrm{2}^{\prime}$', fontsize=label_fs)
        axs[i].legend(loc='lower left', fontsize=14)
    else:
        axs[i].tick_params(labelleft=False)
    axs[i].tick_params(labelbottom=False)
    axs[i].annotate(selec_text[i], xy=(0.05,0.85), xycoords='axes fraction',
                    fontsize=label_fs)
    axs[i].set_xlim(dm_range[0],dm_range[1])
    axs[i].set_ylim(-0.05,1.05)

    # Plot difference
    # for j in range(nfields):
    #     if color_by_glat:
    #         c = cmap(norm(np.abs(field_glat[j])))
    #         #axs[i+3].plot(dmods, ksfs[i][j]-ksfs0[i][j], c=c, alpha=0.05, 
    #         #    rasterized=True)
    #         axs[i+3].plot(dmods, (ksfs[i][j]-ksfs0[i][j]),#/ksfs[i][j], 
    #             c=c, alpha=0.05, 
    #             rasterized=True)
    #     else:
    #         axs[i+3].plot(dmods, ksfs[i][j]-ksfs0[i][j], color='Black', 
    #             alpha=0.01, rasterized=True)
    #     axs[i+3].set_xlabel(r'$\mu$', fontsize=label_fs)
    #     if i == 0:
    #         axs[i+3].set_ylabel(r'$\Delta \mathfrak{S}_\mathrm{2}$ [fractional]', 
    #             fontsize=label_fs)
    #     else:
    #         axs[i+3].tick_params(labelleft=False)
    #     #axs[i].annotate(selec_text[i], xy=(0.05,0.85), xycoords='axes fraction',
    #     #                fontsize=label_fs)
    #     axs[i+3].set_xlim(dm_range[0],dm_range[1])
    #     axs[i+3].set_ylim(-2,2)
    #     axs[i+3].axhline(0, linestyle='dashed', color='Black', linewidth=0.5)
    #     #axs[i+3].plot(dmods, np.median(ksfs[i]-ksfs0[i],axis=0), 
    #     #    color='Black', linewidth=0.5)
    #     axs[i+3].plot(dmods, np.median((ksfs0[i]-ksfs[i]),#/ksfs[i],
    #         axis=0), color='Black', linewidth=2.)

if color_by_glat:
    cbar = fig.colorbar(matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap),
                        cax=cax, orientation='horizontal')
    cbar.ax.set_xticks([0.,15.,30.,45.,60.,75.,90.])
    cbar.ax.set_xlabel(r'$\vert b_\mathrm{field} \vert$ [deg]', 
                       fontsize=label_fs-2)
    #cbar.ax.set_xticklabels(['0','','30','','60','','90'])

# Plot distance modulus
hmask_filename = gap_dir+'halo_apogee_mask.npy'
hmask = np.load(hmask_filename)
halo_dist = allstar_nomask[hmask]['weighted_dist']
halo_dm = 5*np.log10(halo_dist) - 5
nbins = 15
hist_range = [2,19]
halo_dm_n,halo_dm_edges,_ = axt.hist(halo_dm, histtype='step', bins=nbins, 
                                     range=hist_range, density=True, 
                                     color='Black', linewidth=1.5)
halo_dm_cents = (halo_dm_edges[1:]+halo_dm_edges[:-1])/2.
halo_dm_interp = scipy.interpolate.interp1d(halo_dm_cents, halo_dm_n,
                                            kind='cubic', bounds_error=False,
                                            fill_value=0)
_dms = np.linspace(1,20,num=101)
_dm_ns = halo_dm_interp(_dms)
axt.plot(_dms, _dm_ns, color='Red', linewidth=1.)
axt.set_xlim(dm_range[0],dm_range[1])
axt.set_ylim(0,0.3)
axt.set_yticks([0.,0.15,0.3])
axt.set_xticklabels([])
axt.set_ylabel(r'p($\mu$)', fontsize=label_fs)

fig.subplots_adjust(wspace=0.05, hspace=0.3, left=0.05, right=0.95, 
                    bottom=0.15, top=0.95)
fig.savefig(fig_dir+'ksf_dmod_fields_v2.pdf', dpi=300)
fig.show()

### Determine the distribution of distance modulus

In [None]:
fig = plt.figure(figsize=(5,3))
ax = fig.add_subplot(111)

hmask_filename = gap_dir+'halo_apogee_mask.npy'
hmask = np.load(hmask_filename)
halo_dist = allstar_nomask[hmask]['weighted_dist']
halo_dm = 5*np.log10(halo_dist) - 5

nbins = 15
hist_range = [2,19]

halo_dm_n,halo_dm_edges,_ = ax.hist(halo_dm, histtype='step', bins=nbins, 
                                    range=hist_range, density=True, 
                                    color='Black', linewidth=1.5)
halo_dm_cents = (halo_dm_edges[1:]+halo_dm_edges[:-1])/2.
halo_dm_interp = scipy.interpolate.interp1d(halo_dm_cents, halo_dm_n,
                                            kind='cubic', bounds_error=False,
                                            fill_value=0)
_dms = np.linspace(1,20,num=101)
_dm_ns = halo_dm_interp(_dms)
ax.plot(_dms, _dm_ns, color='Red', linewidth=1.)
ax.set_xlim(dm_range[0],dm_range[1])
ax.set_xlabel(r'$\mu$')
ax.set_ylabel(r'$f(\mu)$')

fig.show()

### Calculate the typical difference between v1 and v3 KSFs using the distance modulus PDF

In [None]:
for i in range(len(ksfs)):
    print('\nKinematic space ',selec[i])
    mksf = np.median(ksfs[i],axis=0)
    mksf0 = np.median(ksfs0[i],axis=0)
    fd_mksf = (mksf-mksf0)/mksf0
    print('Fractional difference between medians, weighted along los',
        np.average(fd_mksf,weights=halo_dm_interp(dmods)))

    # dksf = ksfs[i]-ksfs0[i]
    # fdksf = dksf/ksfs0[i]

    # # First take the median and then the weight average along the line of sight
    # m_fdksf = np.nanmedian(fdksf,axis=0)
    # print('Median at each distance modulus, then weighted avg along line of sight',
    #     np.average(m_fdksf,weights=halo_dm_interp(dmods)))

    # # Now try taking the weighted average along each line of sight and then the median
    # wa_fdksf = np.zeros(fdksf.shape[0])
    # for j in range(fdksf.shape[0]):
    #     mask = np.isfinite(fdksf[j])
    #     if np.sum(np.isnan(fdksf[j][mask])) > 0: print(i)
    #     wa_fdksf[j] = np.average(fdksf[j][mask],weights=halo_dm_interp(dmods)[mask])
    # print('Weighted avg along line of sight, then median',
    #     np.percentile(wa_fdksf, [16,50,84]))

### Marginalize the kSF using the typical distribution of distance modulus

In [None]:
weights = halo_dm_interp(dmods)
weights_type = 'dmod'
effSF_mask_use = 'remove'
vmin = 0.1
vmax = 0.6
add_bulge_box = True

cmap = rainbow_cmap
norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)

scatter_kwargs = {'s':8,
                  'cmap':cmap,
                  'vmin':vmin,
                  'vmax':vmax}
cbar_kwargs = {'orientation':'vertical',
               'shrink':0.9}

fig = plt.figure(figsize=(5,int(2.5*len(ksfs))))
axs = fig.subplots(nrows=len(ksfs), ncols=1)

for i in range(len(ksfs)):
    _,_ = plot_ksf(ksfs[i], weights, weights_type, apogee_SF, apogee_effSF_mask, 
                   effSF_mask_use, fig=fig, ax=axs[i], scatter_kwargs=scatter_kwargs, 
                   add_cbar=False, cbar_kwargs=cbar_kwargs, 
                   add_bulge_box=add_bulge_box)
    if i < len(ksfs)-1:
                axs[i].set_xticklabels([])
                axs[i].set_xlabel('')
    axs[i].annotate(selec_text[i], xy=(0.45,0.1), xycoords='axes fraction')

cbar_txt = r'$\int\mathfrak{S}_\mathrm{2}^{\prime}(\mu)p(\mu)\mathrm{d}\mu$'
cax = fig.add_axes([0.84,0.15,0.04,0.75])
cbar = fig.colorbar(matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap),
                        cax=cax, orientation='vertical', 
                        label=cbar_txt)
# cbar.ax.set_xticks([0.,0.2,0.4,0.6,0.8])
    
fig.tight_layout()
fig.subplots_adjust(hspace=0.05, right=0.81)
fig.savefig(fig_dir+'ksf_lb_dmod_marginalized_v2.pdf', dpi=300)
fig.show()

## Examine the purity

### Field-by-field purity vs distance modulus

In [None]:
dm_range = [6.,20.]
field_glat = apogee_SF._apogeeField['GLAT']
cmap = rainbow_cmap
norm = matplotlib.colors.Normalize(vmin=0, vmax=90)
color_by_glat = True

# Figure and axes
fig = plt.figure(figsize=(int(5*len(purs)),4))
gs = fig.add_gridspec(nrows=4,ncols=3)
axs = [fig.add_subplot(gs[1:,0]),
       fig.add_subplot(gs[1:,1]),
       fig.add_subplot(gs[1:,2])]
axt = fig.add_subplot(gs[0,1])
if color_by_glat:
    cax = fig.add_axes([0.15,0.825,0.15,0.05])

for i in range(len(purs)):
    for j in range(nfields):
        if color_by_glat:
            c = cmap(norm(np.abs(field_glat[j])))
            axs[i].plot(dmods, purs[i][j], c=c, alpha=0.05)
        else:
            axs[i].plot(dmods, purs[i][j], color='Black', alpha=0.01)
    axs[i].set_xlabel(r'$\mu$')
    if i == 0:
        axs[i].set_ylabel(r'purity')
    else:
        axs[i].tick_params(labelleft=False)
    axs[i].annotate(selec_text[i], xy=(0.05,0.85), xycoords='axes fraction')
    axs[i].set_xlim(dm_range[0],dm_range[1])
    axs[i].set_ylim(-0.05,1.05)

if color_by_glat:
    cbar = fig.colorbar(matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap),
                        cax=cax, orientation='horizontal', 
                        label=r'$\vert b_\mathrm{field} \vert$ [deg]')
    cbar.ax.set_xticks([0.,15.,30.,45.,60.,75.,90.])
    #cbar.ax.set_xticklabels(['0','','30','','60','','90'])

    
# Plot distance modulus
hmask_filename = gap_dir+'halo_apogee_mask.npy'
hmask = np.load(hmask_filename)
halo_dist = allstar_nomask[hmask]['weighted_dist']
halo_dm = 5*np.log10(halo_dist) - 5
nbins = 15
hist_range = [2,19]
halo_dm_n,halo_dm_edges,_ = axt.hist(halo_dm, histtype='step', bins=nbins, 
                                     range=hist_range, density=True, 
                                     color='Black', linewidth=1.5)
halo_dm_cents = (halo_dm_edges[1:]+halo_dm_edges[:-1])/2.
halo_dm_interp = scipy.interpolate.interp1d(halo_dm_cents, halo_dm_n,
                                            kind='cubic', bounds_error=False,
                                            fill_value=0)
_dms = np.linspace(1,20,num=101)
_dm_ns = halo_dm_interp(_dms)
axt.plot(_dms, _dm_ns, color='Red', linewidth=1.)
axt.set_xlim(dm_range[0],dm_range[1])
axt.set_ylim(0,0.3)
axt.set_yticks([0.,0.15,0.3])
axt.set_xticklabels([])
axt.set_ylabel(r'p($\mu$)')

fig.subplots_adjust(wspace=0.05, hspace=0.3)
fig.savefig(fig_dir+'purity_lb_dmod_marginalized_v2.pdf', dpi=300)
fig.show()

### Marginalize the purity using the typical distribution of distance modulus

In [None]:
weights = halo_dm_interp(dmods)
weights_type = 'dmod'
effSF_mask_use = 'remove'
vmin = 0.5
vmax = 0.9
add_bulge_box = True

cmap = rainbow_cmap
norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)

scatter_kwargs = {'s':8,
                  'cmap':cmap,
                  'vmin':vmin,
                  'vmax':vmax}
cbar_kwargs = {'orientation':'vertical',
               'shrink':0.9}

fig = plt.figure(figsize=(5,int(3*len(ksfs))))
axs = fig.subplots(nrows=len(ksfs), ncols=1)

for i in range(len(purs)):
    cbar_txt = selec_text[i]+r' Purity'
    _,_ = plot_ksf(purs[i], weights, weights_type, apogee_SF, apogee_effSF_mask, 
                   effSF_mask_use, fig=fig, ax=axs[i], scatter_kwargs=scatter_kwargs, 
                   add_cbar=False, cbar_kwargs=cbar_kwargs)
    if i < len(ksfs)-1:
            axs[i].set_xticklabels([])
            axs[i].set_xlabel('')
    axs[i].annotate(selec_text[i], xy=(0.45,0.1), xycoords='axes fraction')

cbar_txt = r'Purity'
cax = fig.add_axes([0.93,0.125,0.04,0.8])
cbar = fig.colorbar(matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap),
                        cax=cax, orientation='vertical', 
                        label=cbar_txt)

fig.tight_layout()
fig.subplots_adjust(hspace=0.05, right=0.88)
fig.show()

### Calculate the purity and completeness weighted by the effective selection function

In [None]:
# Loop over kinematic selections, weight purity by the effective selection 
# function.
v1_purs = [0.840,0.899,0.780]
for i in range(len(purs)):
    purw = np.sum(purs[i][apogee_effSF_mask]*apof)/np.sum(apof)
    print('\nselection: ',selec[i])
    print('purity: ', round(purw,3))
    print('delta purity: ', (round(purw,3)-v1_purs[i]))
    print('fractional delta purity: ', (round(purw,3)-v1_purs[i])/v1_purs[i])

## Other figures not included in the paper

### Field by field purity vs galactocentric radius

In [None]:
cmap = rainbow_cmap
norm = matplotlib.colors.Normalize(vmin=0, vmax=90)
color_by_glat = True
field_glon = apogee_SF._apogeeField['GLON']
field_glat = apogee_SF._apogeeField['GLAT']
r_range = [0,70]

# Figure and axes
fig = plt.figure(figsize=(int(5*len(purs)),4))
axs = fig.subplots(nrows=1,ncols=3)
if color_by_glat:
    cax = fig.add_axes([0.97,0.1,0.02,0.75])

for i in range(len(purs)):
    for j in range(nfields):
        # Creat the orbs for this field
        # if j > 10: continue
        os = orbit.Orbit(
            [field_glon[j]*np.ones(ndmod),
             field_glat[j]*np.ones(ndmod),
             ds,np.zeros(ndmod),np.zeros(ndmod),np.zeros(ndmod)],
            lb=True, ro=ro, vo=vo)
        rs = os.r().value

        if color_by_glat:
            c = cmap(norm(np.abs(field_glat[j])))
            axs[i].plot(rs, purs[i][j], c=c, alpha=0.05)
        else:
            axs[i].plot(rs, purs[i][j], color='Black', alpha=0.01)
    axs[i].set_xlabel(r'$r$ [kpc]')
    if i == 0:
        axs[i].set_ylabel(r'purity')
    else:
        axs[i].tick_params(labelleft=False)
    axs[i].annotate(selec_text[i], xy=(0.05,0.85), xycoords='axes fraction')
    axs[i].set_xlim(r_range[0],r_range[1])
    axs[i].set_ylim(-0.05,1.05)

if color_by_glat:
    cbar = fig.colorbar(matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap),
                        cax=cax, orientation='vertical',
                        label=r'$\vert b_\mathrm{field} \vert$ [deg]')
    cbar.ax.set_xticks([0.,15.,30.,45.,60.,75.,90.])

fig.subplots_adjust(wspace=0.05, hspace=0.3, right=0.95)
fig.show()