In [None]:
# ------------------------------------------------------------------------
#
# TITLE - plot_fit_posteriors.ipynb
# AUTHOR - James Lane
# PROJECT - ges-mass
#
# ------------------------------------------------------------------------
#
# Docstrings and metadata:
'''Make some figures of fit posteriors for the paper.
'''

__author__ = "James Lane"

In [None]:
### Imports

# Basic
import os, sys, pdb, time, tqdm, warnings, multiprocessing, copy, dill as pickle
import numpy as np

# galpy
from galpy import potential
from galpy import actionAngle as aA

# Matplotlib and plotting 
import matplotlib
import matplotlib.pyplot as plt
import corner

# Other
from isodist import Z2FEH,FEH2Z

# Project specific
sys.path.insert(0,'../../src/')
from ges_mass import mass as pmass
from ges_mass import densprofiles as pdens
from ges_mass import iso as piso
from ges_mass import util as putil
from ges_mass import plot as pplot

### Notebook setup

%matplotlib inline
plt.style.use('../../src/mpl/project.mplstyle') # This must be exactly here
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

### Keywords, Pathing, Loading, Data Preparation

In [None]:
# %load ../../src/nb_modules/keywords_pathing_loading_data_prep.py
## Keywords
cdict = putil.load_config_to_dict()
keywords = ['BASE_DIR','APOGEE_DR','APOGEE_RESULTS_VERS','GAIA_DR','NDMOD',
            'DMOD_MIN','DMOD_MAX','LOGG_MIN','LOGG_MAX','FEH_MIN','FEH_MAX',
            'FEH_MIN_GSE','FEH_MAX_GSE','DF_VERSION','KSF_VERSION','NPROCS',
            'RO','VO','ZO']
base_dir,apogee_dr,apogee_results_vers,gaia_dr,ndmod,dmod_min,dmod_max,\
    logg_min,logg_max,feh_min,feh_max,feh_min_gse,feh_max_gse,df_version,\
    ksf_version,nprocs,ro,vo,zo = putil.parse_config_dict(cdict,keywords)
logg_range = [logg_min,logg_max]
feh_range = [feh_min,feh_max]
feh_range_gse = [feh_min_gse,feh_max_gse]
feh_range_all = [feh_min,feh_max_gse]
# feh_range_fit = copy.deepcopy( # Need to choose here


## Pathing
fit_paths = putil.prepare_paths(base_dir,apogee_dr,apogee_results_vers,gaia_dr,
                                df_version,ksf_version)
data_dir,version_dir,ga_dir,gap_dir,df_dir,ksf_dir,fit_dir = fit_paths

## Filenames
fit_filenames = putil.prepare_filenames(ga_dir,gap_dir,feh_range_gse)
apogee_SF_filename,apogee_effSF_filename,apogee_effSF_mask_filename,\
    iso_grid_filename,clean_kinematics_filename = fit_filenames

## File loading and data preparation
fit_stuff,other_stuff = putil.prepare_fitting(fit_filenames,
    [ndmod,dmod_min,dmod_max], ro,zo,return_other=True)
apogee_effSF_mask,dmap,iso_grid,jkmins,dmods,ds,effsel_grid,apof,\
    allstar_nomask,orbs_nomask = fit_stuff
Rgrid,phigrid,zgrid = effsel_grid
# apogee_SF,apogee_effSF_grid_inclArea,apogee_effSF_grid_inclArea_Jac = other_stuff

# ## Load the distribution functions
# df_filename = df_dir+'dfs.pkl'
# betas = [0.3,0.8]
# dfs = putil.load_distribution_functions(df_filename, betas)

# ## Load the APOGEE field information, can also similarly load 
# ## '...apogee_field_glons.npy', '...apogee_field_glats.npy', 
# ## '...apogee_field_location_ids.npy'
# apogee_fields = np.load(ga_dir+'apogee_fields.npy')

In [None]:
fig_dir = './fig/'

### Global Parameters

In [None]:
# %load ../../src/nb_modules/global_fitting_params.py
## general kwargs
verbose = True

## HaloFit kwargs (ordering follows HaloFit.__init__)
# allstar and orbs loaded in prep cell
init = None
init_type = 'ML'
# fit_type provided at runtime
mask_disk = True
mask_halo = True
# densfunc, selec provided at runtime
# effsel, effsel_grid, effsel_mask, dmods loaded in prep cell
nwalkers = 100
nit = int(2e3)
ncut = int(1e3)
# usr_log_prior provided at runtime
n_mass = 5000 # int(nwalkers*(nit-ncut))
int_r_range = [2.,70.]
iso = None # Will read from iso_grid_filename
# iso_filename, jkmins loaded in prep cell
# feh_range provided at runtime
# logg_range loaded in config cell
# fit_dir, gap_dir, ksf_dir loaded in prep cell
# version provided at runtime
# ro, vo, zo loaded in config cell

hf_kwargs = {## HaloFit parameters
             'allstar':allstar_nomask,
             'orbs':orbs_nomask,
             'init':init,
             'init_type':init_type,
             # 'fit_type':fit_type, # provided at runtime
             'mask_disk':mask_disk,
             'mask_halo':mask_halo,
             ## _HaloFit parameters
             # 'densfunc':densfunc, # provided at runtime
             # 'selec':selec, # provided at runtime
             'effsel':apof,
             'effsel_mask':apogee_effSF_mask,
             'effsel_grid':effsel_grid,
             'dmods':dmods,
             'nwalkers':nwalkers,
             'nit':nit,
             'ncut':ncut,
             # 'usr_log_prior':usr_log_prior, # provided at runtime
             'n_mass':n_mass,
             'int_r_range':int_r_range,
             'iso':iso,
             'iso_filename':iso_grid_filename,
             'jkmins':jkmins,
             # 'feh_range':feh_range, # provided at runtime
             'logg_range':logg_range,
             'fit_dir':fit_dir,
             'gap_dir':gap_dir,
             'ksf_dir':ksf_dir,
             # 'version':version, # provided at runtime
             'verbose':verbose,
             'ro':ro,
             'vo':vo,
             'zo':zo}

## pmass.fit() function kwargs
# nprocs set in config file
force_fit = True
mle_init = True
just_mle = False
return_walkers = True
optimizer_method = 'Powell'
mass_int_type = 'spherical_grid'
batch_masses = True
make_ml_aic_bic = True
calculate_masses = True
post_optimization = True
mcmc_diagnostic = True

fit_kwargs = {# 'nprocs':nprocs, # Normally given at runtime 
              'force_fit':force_fit,
              'mle_init':mle_init,
              'just_mle':just_mle,
              'return_walkers':return_walkers,
              'optimizer_method':optimizer_method,
              'mass_int_type':mass_int_type,
              'batch_masses':batch_masses,
              'make_ml_aic_bic':make_ml_aic_bic,
              'calculate_masses':calculate_masses,
              'post_optimization':post_optimization,
              'mcmc_diagnostic':mcmc_diagnostic,
              }

### More parameters

In [None]:
# Arrays to hold information about the different HaloFit objects used for
# plotting
n_profile = 4
theta_in_degr = False
phi_in_degr = True
rad_to_degr = 180./np.pi
selec = [None,'eLz','AD','JRLz']
fit_type = ['all','gse','gse','gse']
sample_labels = ['Halo Sample','$e-L_\mathrm{z}$',r'AD',r'$\sqrt{J_\mathrm{R}}-L_\mathrm{z}$']
# These are the density profiles that will be shown for the above. They should
# be the best-fits.
densfuncs = [pdens.triaxial_single_angle_zvecpa_plusexpdisk,
             pdens.triaxial_single_cutoff_zvecpa_plusexpdisk,
             pdens.triaxial_single_cutoff_zvecpa,
             pdens.triaxial_single_angle_zvecpa_plusexpdisk,
             ]
# Shortcut if all versions are the same
_version = '100w_1e4n'
versions = [_version]*n_profile
# versions = ['100w_2e3n','100w_2e3n','100w_2e3n','1002_2e3n']
colors = ['Black','Red','DarkOrange','DodgerBlue']

### Mapping from density profile to parameter index
Do this so that all sets of parameters can be cast in the same array shape. Each map should be a dictionary with keys as the density function and values as a list of indices of the parameters: alpha1, alpha2, r1, p, q, eta, theta, phi, fdisk. Use -1 if the parameter doesn't exist for that density function. Should make another mapping if showing double broken power laws


In [None]:
# ## Mapping and other variables for up to one break (includes alpha2 and r1)
# n_param = 9
# # Maps alpha1,alpha2,r1,p,q,theta,eta,phi,fdisk
# densfunc_map = {'triaxial_single_angle_zvecpa':[0,-1,-1,1,2,3,4,5,-1],
#                 'triaxial_single_angle_zvecpa_plusexpdisk':[0,-1,-1,1,2,3,4,5,6],
#                 'triaxial_single_cutoff_zvecpa':[0,-1,1,2,3,4,5,6,-1],
#                 'triaxial_single_cutoff_zvecpa_plusexpdisk':[0,-1,1,2,3,4,5,6,7],
#                 'triaxial_broken_angle_zvecpa':[0,1,2,3,4,5,6,7,-1],
#                 'triaxial_broken_angle_zvecpa_plusexpdisk':[0,1,2,3,4,5,6,7,8],}
# for key in densfunc_map.keys():
#     assert len(densfunc_map[key]) == n_param

# domain = [[-5,4],
#           [1,5],
#           [0,20],
#           [0,1],
#           [0,1],
#           [0,2*np.pi],  
#           [0.5,1],
#           [0,np.pi],
#           [0,1]]
# labels = [r'$\alpha_{1}$', r'$\alpha_{2}$', r'$r_{1}$ [kpc]', r'$p$', 
#           r'$q$', r'$\theta$', r'$\eta$', r'$\phi$','$f_\mathrm{disk}$']
# if theta_in_degr:
#     domain[5] = [domain[5][0]*rad_to_degr,domain[5][1]*rad_to_degr]
#     labels[5] += ' [deg]'
# else:
#     labels[5] += ' [rad]'
# if phi_in_degr:
#     domain[7] = [domain[7][0]*rad_to_degr,domain[7][1]*rad_to_degr]
#     labels[7] += ' [deg]'
# else:
#     labels[7] += ' [rad]'
    
# sigmas = [1,2]
# levels = [1 - np.exp(-(s)**2/2) for s in sigmas]

In [None]:
## Mapping and other variables for up to exponential breaks (includes r1)
n_param = 9
# Maps alpha1,r1,p,q,theta,eta,phi,fdisk
densfunc_map = {'triaxial_single_angle_zvecpa':[0,-1,1,2,3,4,5,-1,-2],
                'triaxial_single_angle_zvecpa_plusexpdisk':[0,-1,1,2,3,4,5,6,-2],
                'triaxial_single_cutoff_zvecpa':[0,1,2,3,4,5,6,-1,-2],
                'triaxial_single_cutoff_zvecpa_plusexpdisk':[0,1,2,3,4,5,6,7,-2],}
for key in densfunc_map.keys():
    assert len(densfunc_map[key]) == n_param

domain = [[-5,4],
          [0,20],
          [0,1],
          [0,1],
          [0,2*np.pi],  
          [0.5,1],
          [0,np.pi],
          [0,1],
          [7.,10.]]
labels = [r'$\alpha_{1}$', r'$r_{1}$ [kpc]', r'$p$', 
          r'$q$', r'$\theta$', r'$\eta$', r'$\phi$','$f_\mathrm{disk}$',
          r'$\log_{10}(\mathrm{M}/\mathrm{M}_{\odot})$']
if theta_in_degr:
    domain[4] = [domain[4][0]*rad_to_degr,domain[4][1]*rad_to_degr]
    labels[4] += ' [deg]'
else:
    labels[4] += ' [rad]'
if phi_in_degr:
    domain[6] = [domain[6][0]*rad_to_degr,domain[6][1]*rad_to_degr]
    labels[6] += ' [deg]'
else:
    labels[6] += ' [rad]'
    
sigmas = [1,2]
levels = [1 - np.exp(-(s)**2/2) for s in sigmas]

In [None]:
# Make a hack function to get rid of plot children
def kill_child_by_attr(ax,child_class,attribute,val):
    children = ax.get_children()
    for child in children:
        res = False
        if isinstance(child,child_class):
            v = getattr(child,attribute)
            if isinstance(v,(list,tuple,np.ndarray)):
                res = np.all(np.atleast_1d(v)==np.atleast_1d(val))
            if isinstance(v,str):
                res = v==val
        if res:
            # print('removing child: '+str(child))
            child.remove()

In [None]:
# Make the figure
if n_param == 9:
    fig = None
    thin = None
    thin_to = 500000
    smooth = 1.5
    smooth1d = 1.5
    alpha = 0.5
    label_fontsize = 24
    ticklabel_fontsize = 16
    #axl_coords = [0.17,0.9,0.1,0.1]
    axl_w,axl_h = 0.1,0.1
    axl_coords = [0.99-axl_w,0.99-axl_h,axl_w,axl_h]
elif n_param == 8:
    fig = None
    thin = None
    thin_to = 500000
    smooth = 1.5
    smooth1d = 1.5
    alpha = 0.5
    label_fontsize = 24
    ticklabel_fontsize = 16
    axl_coords = [0.19,0.88,0.1,0.1]

for i in range(n_profile):

    if fit_type[i] == 'gse':
        feh_range_fit = copy.deepcopy(feh_range_gse)
    else:
        feh_range_fit = copy.deepcopy(feh_range_all)
    hf = pmass.HaloFit(densfunc=densfuncs[i], fit_type=fit_type[i], 
                       version=versions[i], selec=selec[i], 
                       feh_range=feh_range_fit, **hf_kwargs)

    # Load the results
    hf.get_results()
    hf_samples = hf.samples
    hf_samples = pdens.denormalize_parameters(hf_samples,hf.densfunc,
        theta_in_degr=theta_in_degr, phi_in_degr=phi_in_degr)
    n_samples = hf_samples.shape[0]
    print(n_samples)
    
    # Map the samples from the density function to the full parameter array.
    # First start with fake random data, then offset it so it's within the 
    # plotting window, otherwise the program will crash...
    samples = np.random.random((n_samples,n_param))
    for j in range(samples.shape[1]):
        samples[:,j] += domain[j][0]
    for j in range(n_param):
        if densfunc_map[densfuncs[i].__name__][j] >= 0:
            samples[:,j] = hf_samples[:,densfunc_map[densfuncs[i].__name__][j]]
    samples[:,-1] = np.log10(hf.masses)

    # Thin the samples
    if thin is not None:
        thin = int(thin)
        print('thinning by factor '+str(thin))
        samples = samples[::thin,:]
    elif thin_to is not None:
        _thin = np.floor(n_samples/thin_to).astype(int)
        print('thinning to N='+str(thin_to)+', thinning by factor '+str(_thin))
        samples = samples[::_thin,:]
    
    # Make the corner plot
    label_kwargs = {'fontsize':label_fontsize,}
    hist_kwargs = {'alpha':alpha,}
    corner_kwargs = {'range':domain,
                     'color':colors[i],
                     'labels':labels,
                     'smooth':smooth,
                     'smooth1d':smooth1d,
                     'plot_datapoints':False,
                     'plot_density':False,
                     'plot_contours':True,
                     'fill_contours':False,
                     'top_ticks':True,
                     'label_kwargs':label_kwargs,
                     'hist_kwargs':hist_kwargs,}
    contour_kwargs = {'colors':colors[i],
                      'alpha':alpha}
    hist2d_kwargs = {'contour_kwargs':contour_kwargs,
                     'levels':levels}

    if fig is None:
        fig = corner.corner(samples, **corner_kwargs, **hist2d_kwargs)
    else:
        fig = corner.corner(samples, **corner_kwargs, **hist2d_kwargs, fig=fig)
        
    # Now hack the figure to get rid of lines in places where the parameter
    # doesn't exist
    c_to_kill = matplotlib.colors.to_rgba(colors[i],alpha=alpha)
    indx_to_kill = np.where(np.array(densfunc_map[densfuncs[i].__name__]) == -1)[0]
    if len(indx_to_kill) == 0:
        continue
    
    axs = fig.get_axes()
    for j,ax in enumerate(axs):
        xindx = j%n_param
        yindx = j//n_param
        if xindx > yindx: # upper diagonal, no axes
            continue
        if xindx == yindx: # A diagonal, a histogram
            if xindx in indx_to_kill or yindx in indx_to_kill:
                kill_child_by_attr(ax,matplotlib.lines.Line2D,
                    '_color',colors[i])
            # Re-limit y axis
            ax.relim()
            ax.autoscale(axis='y')
            _ylim = ax.get_ylim()
            ax.set_ylim(0.,_ylim[1])
        elif xindx < yindx: # lower diagonal, a contour
            if xindx in indx_to_kill or yindx in indx_to_kill:
                kill_child_by_attr(ax,matplotlib.collections.PathCollection,
                    '_edgecolors',c_to_kill)
        # Manually set the size of the axis labels
        ax.tick_params(axis='both',labelsize=ticklabel_fontsize)

axl = fig.add_axes(axl_coords)
for i in range(n_profile):
    axl.plot([], [], c=colors[i], alpha=alpha, label=sample_labels[i], 
             linewidth=3.)
axl.legend(fontsize=24, frameon=True, handlelength=2., handleheight=1.5, 
           markerscale=4.)
axl.axis('off')

fig.savefig(fig_dir+'posterior.pdf')
fig.show()