# Eccentric GW Search Ideal

This notebook runs an MCMC on a simulated dataset containing an eccentric gw signal. Based on work done by Sarah Vigeland, Ph.D. from `cw_search_sample.ipynb`

Updated: 12/17/2021

## Getting Started
Before running this notebook, please make sure to have an `enterprise` conda environment installed and activated. Instructions for setting up an `enterprise` conda environment can be found here: https://paper.dropbox.com/doc/Installing-enterprise-for-CWs-adVcXoBRq9GRVCk6uftjn

As this is searching over a simulated dataset, you must first use `ecc_res_simulate.ipynb` to create your simulated dataset.

## Table of Contents:
* [Load in Data](#first-bullet)
* [Create PTA Object](#second-bullet)
* [Sampler Setup](#third-bullet)

In [5]:
from __future__ import division
import numpy as np
import glob
import os
import pickle
import json
import sys

from enterprise.signals import parameter
from enterprise.pulsar import Pulsar
from enterprise.signals import selections
from enterprise.signals import signal_base
from enterprise.signals import white_signals
from enterprise.signals import gp_signals
from enterprise.signals import deterministic_signals
import enterprise.constants as const
from enterprise.signals import utils
from enterprise_extensions.deterministic import CWSignal
from enterprise.signals.signal_base import SignalCollection
from PTMCMCSampler.PTMCMCSampler import PTSampler as ptmcmc
from enterprise_extensions.sampler import JumpProposal as JP
from enterprise_extensions.sampler import group_from_params
from enterprise_extensions.sampler import get_global_parameters
import ecc_res
import scipy.constants as sc

%load_ext autoreload
%autoreload 2

Do not have mpi4py package.


In [6]:
def get_noise_from_pal2(noisefile):
    psrname = noisefile.split('/')[-1].split('_noise.txt')[0]
    fin = open(noisefile, 'r')
    lines = fin.readlines()
    params = {}
    for line in lines:
        ln = line.split()
        if 'efac' in line:
            par = 'efac'
            flag = ln[0].split('efac-')[-1]
        elif 'equad' in line:
            par = 'log10_equad'
            flag = ln[0].split('equad-')[-1]
        elif 'jitter_q' in line:
            par = 'log10_ecorr'
            flag = ln[0].split('jitter_q-')[-1]
        elif 'RN-Amplitude' in line:
            par = 'red_noise_log10_A'
            flag = ''
        elif 'RN-spectral-index' in line:
            par = 'red_noise_gamma'
            flag = ''
        else:
            break
        if flag:
            name = [psrname, flag, par]
        else:
            name = [psrname, par]
        pname = '_'.join(name)
        params.update({pname: float(ln[1])})
    return params

In [7]:
def get_ew_groups(pta):
    """Utility function to get parameter groups for ecc CW sampling.
    These groups should be appended to the usual get_parameter_groups()
    output.
    """
    params = pta.param_names
    ndim = len(params)
    groups = [list(np.arange(0, ndim))]
    
    snames = np.unique([[qq.signal_name for qq in pp._signals] 
                        for pp in pta._signalcollections])
    
    # sort parameters by signal collections
    ephempars = []

    for sc in pta._signalcollections:
        for signal in sc._signals:
            if signal.signal_name == 'phys_ephem':
                ephempars.extend(signal.param_names)

    #separate pdist and pphase params
    pdist_params = [ p for p in params if 'p_dist' in p ]
    pphase_params = [ p for p in params if 'pphase' in p ]
    gammap_params = [ p for p in params if 'gamma_P' in p ]
    groups.extend([[params.index(pd) for pd in pdist_params]])
    groups.extend([[params.index(pp) for pp in pphase_params]])
    groups.extend([[params.index(gp) for gp in gammap_params]])
    
    if 'red_noise' in params:

        # create parameter groups for the red noise parameters
        rnpsrs = [ p.split('_')[0] for p in params if '_log10_A' in p and 'gwb' not in p]
        b = [params.index(p) for p in params if 'alpha' in p]
        for psr in rnpsrs:
            groups.extend([[params.index(psr + '_red_noise_gamma'), params.index(psr + '_red_noise_log10_A')]])

        b = [params.index(p) for p in params if 'alpha' in p]
        groups.extend([b])

        for alpha in b:
            groups.extend([[alpha, params.index('J0613-0200_red_noise_gamma'), params.index('J0613-0200_red_noise_log10_A')]])


        for i in np.arange(0,len(b),2):
            groups.append([b[i],b[i+1]])


        groups.extend([[params.index(p) for p in rnpars]])

    if 'e0' in pta.params:
        gpars = ['log10_Mc', 'e0', 'q', 'gamma0', 'l0', 'psi'] #global params
        groups.append([params.index(gp) for gp in gpars]) #add global params

        #pair global params
        groups.extend([[params.index('log10_Mc'), params.index('q')]])
        groups.extend([[params.index('log10_Mc'), params.index('e0')]])
        groups.extend([[params.index('gamma0'), params.index('l0')]])
        groups.extend([[params.index('gamma0'), params.index('psi')]])
        groups.extend([[params.index('psi'), params.index('l0')]])
        

        for pd, pp, gp in zip(pdist_params, pphase_params, gammap_params):
            groups.extend([[params.index(pd), params.index(pp), params.index(gp)]])
            groups.extend([[params.index(pd), params.index(pp), params.index(gp), params.index('log10_Mc')]])
            groups.extend([[params.index(pd), params.index(pp), params.index(gp), params.index('log10_Mc'), params.index('e0'), params.index('q')]])

## Loading in Data <a class="anchor" id="first-bullet"></a>

Here we load in our simulated dataset and associated noise files. The dataset can be loaded in as a .pkl or individual .par and .tim files. Pickle files are typically used as they make the loading of the pulsars a faster process, so if individual .tim and .par files are loaded then it will create a pickle file for later use.

In [6]:
#Simulated dataset and noise directory paths
datadir = '/home/bcheeseboro/nanograv_proj/enterprise_proj/ecc_signal_create/ecc_sim_data/12p5_simulated/3c66b_ish/'
noisepath = '/home/bcheeseboro/nanograv_proj/enterprise_proj/'

In [5]:
#if there's a pickle file then use that
pkl_name = 'ideal_pulsars_ecc_search.pkl'
filename = datadir + pkl_name
if os.path.exists(filename):
    with open(filename, "rb") as f:
        psrs = pickle.load(f)
#else load the par and tim files in and make a pickle file for the future
else:
    psrs = []
    #load par, tim, and noise files for each of the pulsars
    parfiles = sorted(glob.glob(datadir+'/*.par'))
    timfiles = sorted(glob.glob(datadir+'/*.tim'))
    for p, t in zip(parfiles, timfiles):
        print('Loading pulsar from parfile {0}'.format(p))
        psrs.append(Pulsar(p, t))
    pickle.dump(psrs, open(filename, 'wb'))

Loading pulsar from parfile //home/bcheeseboro/nanograv_proj/enterprise_proj/ecc_signal_create/ecc_sim_data/12p5_simulated/3c66b_ish/B1855+09_simulate.par
Loading pulsar from parfile //home/bcheeseboro/nanograv_proj/enterprise_proj/ecc_signal_create/ecc_sim_data/12p5_simulated/3c66b_ish/J0030+0451_simulate.par
Loading pulsar from parfile //home/bcheeseboro/nanograv_proj/enterprise_proj/ecc_signal_create/ecc_sim_data/12p5_simulated/3c66b_ish/J0613-0200_simulate.par
Loading pulsar from parfile //home/bcheeseboro/nanograv_proj/enterprise_proj/ecc_signal_create/ecc_sim_data/12p5_simulated/3c66b_ish/J1012+5307_simulate.par
Loading pulsar from parfile //home/bcheeseboro/nanograv_proj/enterprise_proj/ecc_signal_create/ecc_sim_data/12p5_simulated/3c66b_ish/J1024-0719_simulate.par
Loading pulsar from parfile //home/bcheeseboro/nanograv_proj/enterprise_proj/ecc_signal_create/ecc_sim_data/12p5_simulated/3c66b_ish/J1455-3330_simulate.par
Loading pulsar from parfile //home/bcheeseboro/nanograv_proj

In [5]:
#get noise files
#noisefiles = sorted(glob.glob(noisepath+'/*.txt'))
#params = {}
#for nf in noisefiles:
#    params.update(get_noise_from_pal2(nf))

In [8]:
#get noise params
nf_name = 'channelized_12p5yr_v3_full_noisedict.json'
with open(noisepath+nf_name) as nf:
    noise_params = json.load(nf)

In [9]:
#noise_dict = {}
#for psr in psrs:
#    for ky, val in zip(noise_params.keys(), noise_params.values()):
#        if psr.name in ky:
#           if 'efac' in ky:
#                noise_dict.update({ky:val})

In [7]:
#Checking simulated pulsar residuals
#psr = psrs[9]

#U, _ = utils.create_quantization_matrix(psr.toas, nmin=1)
#uinds = utils.quant2ind(U)
#avetoas = np.array([psr.toas[sc].mean() for sc in uinds])
#averes = np.array([psr.residuals[sc].mean() for sc in uinds])
#plt.plot(avetoas/86400, averes/1e-6)

## Create PTA Object <a class="anchor" id="second-bullet"></a>

Now that all the data has been loaded in, it is time to create the PTA object. This object simulates a PTA containing information about all the pulsars in the dataset and applies the signal model to our dataset. The first step is setup the white noise parameters. These are usually held fixed in a CW search.

In [7]:
## white noise parameters
# set them to constant here and we will input the noise values after the model is initialized
efac = parameter.Constant()
equad = parameter.Constant()
ecorr = parameter.Constant()

# define selection by observing backend
selection = selections.Selection(selections.by_backend)

# define white noise signals
ef = white_signals.MeasurementNoise(efac=efac, selection=selection)
eq = white_signals.EquadNoise(log10_equad=equad, selection=selection)
ec = gp_signals.EcorrBasisModel(log10_ecorr=ecorr, selection=selection, name='')

As for the intrinsic pulsar red noise, this is searched over and can be established using the `red_noise_block` function. The `log-uniform` prior is used in detection run searches.

In [None]:
# red noise
rn = red_noise_block(prior='log-uniform')

#red noise empirical distribution
empirical_distr = datadir + args.rn_pkl

After initializing the white and red noise parameters, we must set up the ecc signal model. As this is a targerted CW search, we hold the source distance and sky location fixed. We also hold the orbital frequency and inclination fixed. All other parameters are searched over the specified prior ranges.

In [11]:
#Eccentric gw parameters
#gw parameters
gwphi = parameter.Constant(args.gwphi)('gwphi') #RA of source
gwtheta = parameter.Constant(args.gwtheta)('gwtheta') #DEC of source
log10_dist = parameter.Constant(args.gwdist)('log10_dist') #distance to source

#constant parameters
log10_forb = parameter.Constant(args.f_orb)('log10_forb') #log10 orbital frequency
inc = parameter.Constant(args.inc)('inc') #inclination of the binary's orbital plane

#Search parameters
q = parameter.Uniform(0.1,1)('q') #mass ratio
log10_mc = parameter.Uniform(7,11)('log10_Mc') #log10 chirp mass
e0 = parameter.Uniform(0.001, 0.99)('e0') #eccentricity
p_dist = parameter.Normal(0,1) #prior on pulsar distance
pphase = parameter.Uniform(0,2*np.pi) #prior on pulsar phase
gamma_P = parameter.Uniform(0,2*np.pi) #prior on pulsar gamma
l0 = parameter.Uniform(0,2*np.pi)('l0') #mean anomaly
gamma0 = parameter.Uniform(0,2*np.pi)('gamma0') #initial angle of periastron
psi = parameter.Uniform(0,2*np.pi)('psi') #polarization of the GW

For `ecc_res` to work, it is important to set tref to the last TOA of the dataset.

In [12]:
tmin = [p.toas.min() for p in psrs]
tmax = [p.toas.max() for p in psrs]
tref = max(tmax)/86400

Now we construct the eccentric signal model.

In [13]:
#Eccentric signal construction
#To create a signal to be used by enterprise you must first create a residual 
#and use CWSignal to convert the residual as part of the enterprise Signal class
ewf = ecc_res.add_ecc_cgw(gwtheta=gwtheta, gwphi=gwphi, log10_mc=log10_mc, q=q, log10_forb=log10_forb, e0=e0, l0=l0, gamma0=gamma0, 
                    inc=inc, psi=psi, log10_dist=log10_dist, p_dist=p_dist, pphase=pphase, gamma_P=gamma_P, tref=tref, 
                    psrterm=True, evol=True, waveform_cal=True, res='Both')
ew = CWSignal(ewf, ecc=False, psrTerm=False) #ecc and psrTerm are set to False to prevent excess parameters 
                                             #that are not used by ecc_res being introduced in the search.


Once all the signals that will be used in the signal model have been established, we add them together to make the signal collection.

In [14]:
# linearized timing model
tm = gp_signals.TimingModel(use_svd=False)
# full signal with red noise and white noise signals
s = ef + tm + ew + eq + ec + rn

Now that the signal collection has been established it is time to initialize the PTA object.

In [15]:
# initialize PTA
model = [s(psr) for psr in psrs]
pta = signal_base.PTA(model)

In [23]:
#set white noise params to backend values
pta.set_default_params(noise_dict)

INFO: enterprise.signals.signal_base: Setting B1855+09_430_ASP_efac to 1.149036589204419
INFO: enterprise.signals.signal_base: Setting B1855+09_430_PUPPI_efac to 1.0543836580555548
INFO: enterprise.signals.signal_base: Setting B1855+09_L-wide_ASP_efac to 1.080749375878354
INFO: enterprise.signals.signal_base: Setting B1855+09_L-wide_PUPPI_efac to 1.2429537099744354
INFO: enterprise.signals.signal_base: Setting J0030+0451_430_ASP_efac to 1.178988376393598
INFO: enterprise.signals.signal_base: Setting J0030+0451_430_PUPPI_efac to 1.0168220759726072
INFO: enterprise.signals.signal_base: Setting J0030+0451_L-wide_ASP_efac to 1.1594082526710712
INFO: enterprise.signals.signal_base: Setting J0030+0451_L-wide_PUPPI_efac to 1.1157243178133207
INFO: enterprise.signals.signal_base: Setting J0030+0451_S-wide_PUPPI_efac to 0.991497940413971
INFO: enterprise.signals.signal_base: Setting J0613-0200_Rcvr1_2_GASP_efac to 1.0670783724500694
INFO: enterprise.signals.signal_base: Setting J0613-0200_Rcvr1

In [16]:
pta.params

[B1855+09_cw_gamma_P:Uniform(pmin=0, pmax=6.283185307179586),
 B1855+09_cw_p_dist:Normal(mu=0, sigma=1),
 B1855+09_cw_pphase:Uniform(pmin=0, pmax=6.283185307179586),
 J0030+0451_cw_gamma_P:Uniform(pmin=0, pmax=6.283185307179586),
 J0030+0451_cw_p_dist:Normal(mu=0, sigma=1),
 J0030+0451_cw_pphase:Uniform(pmin=0, pmax=6.283185307179586),
 J0613-0200_cw_gamma_P:Uniform(pmin=0, pmax=6.283185307179586),
 J0613-0200_cw_p_dist:Normal(mu=0, sigma=1),
 J0613-0200_cw_pphase:Uniform(pmin=0, pmax=6.283185307179586),
 J1012+5307_cw_gamma_P:Uniform(pmin=0, pmax=6.283185307179586),
 J1012+5307_cw_p_dist:Normal(mu=0, sigma=1),
 J1012+5307_cw_pphase:Uniform(pmin=0, pmax=6.283185307179586),
 J1024-0719_cw_gamma_P:Uniform(pmin=0, pmax=6.283185307179586),
 J1024-0719_cw_p_dist:Normal(mu=0, sigma=1),
 J1024-0719_cw_pphase:Uniform(pmin=0, pmax=6.283185307179586),
 J1455-3330_cw_gamma_P:Uniform(pmin=0, pmax=6.283185307179586),
 J1455-3330_cw_p_dist:Normal(mu=0, sigma=1),
 J1455-3330_cw_pphase:Uniform(pmin=0,

## Sampler Setup <a class="anchor" id="third-bullet"></a>
Now that the PTA object has been initialized, it is time to set up the sampler. The first step is to establish a starting location for the walker.

In [17]:
#Select sample from the search parameters
xecc = np.hstack(np.array([p.sample() for p in pta.params])) #walker starting location
ndim = len(xecc) #number of dimensions

34

Before running an MCMC, it is a good idea to make sure that the likelihood functin can calculate a probability based on the starting location of the walker. This is mainly for compatibility and can be skipped if thing are fine.

In [18]:
#testing to see if we get a likelihood value
pta.get_lnlikelihood(xecc)

-845469.5562385791

In [19]:
#initialize pulsar distance parameters
p_dist_params = [ p for p in pta.param_names if 'p_dist' in p ]
for pd in p_dist_params:
    xecc[pta.param_names.index(pd)] = 0

In [20]:
#initialize jump covariance matrix
cov = np.diag(np.ones(ndim) * 0.01**2)

Groups allow covariant parameters to be moved together as the walker traverses the parameter space.

In [21]:
#get groups
groups = get_ew_groups(pta)

[[0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33],
 [31, 32, 30, 33],
 [31, 32],
 [31, 33],
 [31, 30],
 [30, 32],
 [1, 4, 7, 10, 13, 16, 19, 22, 25, 28],
 [2, 5, 8, 11, 14, 17, 20, 23, 26, 29],
 [0, 3, 6, 9, 12, 15, 18, 21, 24, 27],
 [1, 2, 0],
 [1, 2, 0, 31, 32],
 [1, 2, 0, 31, 32, 30, 33],
 [4, 5, 3],
 [4, 5, 3, 31, 32],
 [4, 5, 3, 31, 32, 30, 33],
 [7, 8, 6],
 [7, 8, 6, 31, 32],
 [7, 8, 6, 31, 32, 30, 33],
 [10, 11, 9],
 [10, 11, 9, 31, 32],
 [10, 11, 9, 31, 32, 30, 33],
 [13, 14, 12],
 [13, 14, 12, 31, 32],
 [13, 14, 12, 31, 32, 30, 33],
 [16, 17, 15],
 [16, 17, 15, 31, 32],
 [16, 17, 15, 31, 32, 30, 33],
 [19, 20, 18],
 [19, 20, 18, 31, 32],
 [19, 20, 18, 31, 32, 30, 33],
 [22, 23, 21],
 [22, 23, 21, 31, 32],
 [22, 23, 21, 31, 32, 30, 33],
 [25, 26, 24],
 [25, 26, 24, 31, 32],
 [25, 26, 24, 31, 32, 30, 33],
 [28, 29, 27],
 [28, 29, 27, 31,

In [22]:
#output directory for all the chains, params, and groups
chaindir = '/home/bcheeseboro/nanograv_proj/enterprise_proj/ecc_search_data/ideal_data_test/detection_runs/fixed_coords/correct_dist/efac_added/logmc_9.5/source11/run1/'

In [24]:
resume = True #If True, this allows the sampler to resume from the last walker location.
sampler = ptmcmc(ndim, pta.get_lnlikelihood, pta.get_lnprior, cov, groups=groups,
                 outDir=chaindir, resume=resume) #establish sampler object

# write parameter file and parameter groups file
np.savetxt(chaindir + 'params.txt', list(map(str, pta.param_names)), fmt='%s')
np.savetxt(chaindir + 'groups.txt', groups, fmt='%s')

  return array(a, dtype, copy=False, order=order)


To help improve sampling of the parameter space, it is best to set up as many jump proposals as possible.

In [25]:
# add prior draws to proposal cycle
jp = JP(pta)
sampler.addProposalToCycle(jp.draw_from_prior, 5)


#draw from pdist priors
pdist_params = [psr.name+'_cw_p_dist' for psr in psrs]
for pd in pdist_params:
    sampler.addProposalToCycle(jp.draw_from_par_prior(pd),5)

#draw from phase priors
pphase_params = [psr.name+'_cw_pphase' for psr in psrs]
for pp in pphase_params:
    sampler.addProposalToCycle(jp.draw_from_par_prior(pp),5)

#draw from gamma_P priors
gammap_params = [psr.name+'_cw_gamma_P' for psr in psrs]
for gp in gammap_params:
    sampler.addProposalToCycle(jp.draw_from_par_prior(gp),5)

rn_params = [psr.name+'_red_noise_gamma' for psr in psrs]
for rnp in rn_params:
    sampler.addProposalToCycle(jp.draw_from_par_prior(rnp),5)

rna_params = [psr.name+'_red_noise_log10_A' for psr in psrs]
for rna in rna_params:
    sampler.addProposalToCycle(jp.draw_from_par_prior(rna),5)

#RN empirical distribution prior draw
if empirical_distr is not None:
    sampler.addProposalToCycle(jp.draw_from_empirical_distr, 5)

#draw from ewf priors
ew_params = ['e0','log10_Mc', 'q', 'l0', 'gamma0', 'psi']
for ew in ew_params:
    sampler.addProposalToCycle(jp.draw_from_par_prior(ew),5)

In [26]:
N = int(1.5e6) #number of iterations

Once the sampler setup is complete, go ahead and run it!

In [27]:
#run sampler
sampler.sample(xecc, N, SCAMweight=50, AMweight=50, DEweight=0)

Resuming run from chain file /home/bcheeseboro/nanograv_proj/enterprise_proj/ecc_search_data/ideal_data_test/detection_runs/fixed_coords/correct_dist/efac_added/logmc_9.5/source11/run1//chain_1.txt
Adding DE jump with weight 0


  logpdf = np.log(self.prior(value, **kwargs))


Finished 76.73 percent in 70368.919638 s Acceptance rate = 0.0359869

KeyboardInterrupt: 