In [5]:
from __future__ import division
import numpy as np
import glob
import os
import matplotlib.pyplot as plt
import libstempo as t2
from enterprise.signals import utils

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

ImportError: No module named enterprise.signals

# 1. Clean par and tim files

1. Make a directory called `partim` that contains a single par file and single tim file for all pulsars.

2. Clean the par files by removing all noise and DM model parameters. We will also add in DM1 and DM2 if not already present in the par file.

3. Concatenate all "INCLUDEs" into a single tim file

### Create partim directory

In [None]:
DR2DATA = '/home/nanograv/local_data/IPTA_DR2/'
datadir = DR2DATA + '/release/VersionA/'
parfiles = glob.glob(datadir + '/J*/*IPTADR2.par')
os.system('mkdir -p partim')

### Clean par files

In [None]:
for parfile in parfiles:
    cut = ['T2EFAC', 'T2EQUAD', 'ECORR', 'TNEF', 'TNEQ', 'TNECORR', 'DMMODEL', 
           '_DM', '_CM', 'CONSTRAIN', 'DMOFF', 'START', 'FINISH', 
           'TZRSITE'  # maybe remove this
          ]
    fin = open(parfile, 'r')
    lines = fin.readlines()
    name = lines[0].split()[1]
    fout = open('partim/{}.par'.format(name), 'w')
    for line in lines:
        if not any([line.startswith(flag) for flag in cut]):
            fout.write('%s'%line)
            try:
                if line.split()[0] == 'DM':
                    fout.write('DM1 0 1\n')
                    fout.write('DM2 0 1\n')
            except IndexError:
                pass
    fin.close()
    fout.close()

### Combine tim files

In [None]:
psrs = glob.glob(datadir + 'J*')
for psr in psrs:
    name = psr.split('/')[-1]
    tfile = open(psr+'/{}.IPTADR2.tim'.format(name), 'r')
    lines = tfile.readlines()
    timfiles = []
    for line in lines:
        if not (line.startswith('FORMAT') or line.startswith('#')) and line.strip():
            timfiles.append(psr + '/' + line.split()[-1])
    tfile.close()
    fout = open('partim/{}.tim'.format(name), 'w')
    fout.write('FORMAT 1\n')
    fout.write('MODE 1\n')
    for tim in timfiles:
        fin = open(tim, 'r')
        lines = fin.readlines()
        for line in lines:
            if not (line.startswith('FORMAT') or line.startswith('MODE') or line.startswith('C')):
                fout.write('%s\n'%line.rstrip())
            
        fin.close()
    fout.close()

# 2. Create filtered par and tim files

Here we create a filtered dataset by applying a frequency filter and a PTA filter

1. **Frequency filter**: Only keep TOAs that have a certain bandwidth (`bw`) coverage over a certain period of time (`dt`). For instance, `dt=7` and `bw=800` will only keep TOAs that have at least 800 MHz of bandwidth (i.e. difference between max and min radio frequency) in every 7 day window.

2. **PTA filter**: Only keep TOAs corresponding to a given PTA, or group of PTAs.

In [2]:
def get_dm_bins(toas, dt=7):
    """Returns a list of boolean arrays for each time bin corresponding
    to TOAs within that bin.
    """
    bins = int(np.ceil((toas.max() - toas.min()) / (86400*dt)))
    tmin = toas.min() - 1
    tmax = toas.max() + 1
    _, xedges = np.histogram(toas, bins=bins, range=[tmin, tmax])
    return [np.logical_and(toas >= xedges[ct], toas <= xedges[ct+1]) 
            for ct in range(len(xedges)-1)]

In [3]:
def filter_psr(psr, bw=1.1, dt=7, filters=None, frequency_filter=True, plotting=True):
    """Returns a new `libstempo` object that has the frequency and optional PTA filter applied."""
    
    psr.deleted[:] = 1
    print('Working on PSR {}'.format(psr.name))
    
    # Flag filtering
    idx0 = []
    if filters:
        for fltr in filters:
            for key, val in fltr.items():
                print('Keeping TOAs corresponding to {} {}'.format(key, val))
                flag_cond = [np.flatnonzero(psr.flagvals(key)==v) for v in val]
                idx0.append(np.concatenate(flag_cond))
    
    
    idx0 = np.unique(np.array(idx0))
    
    # filter for frequency coverage
    if frequency_filter:
        bins = get_dm_bins(psr.toas()*86400, dt=7)
        idx = []
        for bn in bins:
            if sum(bn) > 1:
                ix = filter(lambda x: x in idx0, np.flatnonzero(bn))
                if len(ix) > 0:
                    if psr.freqs[ix].max() / psr.freqs[ix].min() >= bw:
                        idx.append(ix)
                    elif psr.freqs[ix].max() > 3000:
                        idx.append(ix)
                        
        # check for empty list (i.e. there is no multi-frequency data)
        if not idx:
            print "No multi-frequency data, returning original psr"
            return psr

        # delete
        idx = np.unique(np.concatenate(idx))
    else:
        idx = idx0
    psr.deleted[idx] = 0
                
    # filter design matrix
    mask = psr.deleted[:] == 0
    M = psr.designmatrix()[mask, :]
    dpars = []
    for ct, (par, val) in enumerate(zip(psr.pars(), M.sum(axis=0)[1:])):
        if val == 0:
            dpars.append(par)
            psr[par].fit = False
            psr[par].val = 0.0
    print('Cutting {} TOAs'.format(np.sum(~mask)))
    print('Turning off fit for {}'.format(dpars))
    print('\n')
    if plotting:
        plt.figure(figsize=(8,3))
        nix = psr.flagvals('pta') == 'NANOGrav'
        plt.plot(psr.toas(), psr.freqs, '.')
        plt.plot(psr.toas()[nix], psr.freqs[nix], '.', color='C2')
        plt.plot(psr.toas()[~psr.deletedmask()], psr.freqs[~psr.deletedmask()], '.', color='C1')
        plt.title(psr.name)
    return psr

In [4]:
def make_dataset(psrdict, outdir='partim_filtered'):
    """Makes a filtered dataset"""
    
    os.system('rm -rf {}'.format(outdir))
    os.system('mkdir -p {}'.format(outdir))
    for pname, filters in sorted(psrdict.items()):
        parfile = 'partim/{}.par'.format(pname)
        timfile = 'partim/{}.tim'.format(pname)
        psr = t2.tempopulsar(parfile, timfile, maxobs=30000)
        if pname in ['J0437-4715', 'J2317+1439']:
            frequency_filter = False
        else:
            frequency_filter = True
        try:
            psr = filter_psr(psr, bw=1.1, dt=30, filters=filters, frequency_filter=frequency_filter)
            psr.savetim('{}/{}.tim'.format(outdir, pname))
            psr.savepar('{}/{}.par'.format(outdir, pname))
            del psr
        except:
            print("no data, skipping")
            print("\n")

### Pulsar dictionary

This is input to the `make_dataset` function. We list the pulsars we want to include along with the corresponding PTAs we want to use. In the case below we only have one PTA per pulsar but one could use `['NANOGrav', 'PPTA']` as well as any other combination.

In [None]:
psrdict = {}
PSRnames = glob.glob('partim/*.par')

for psrname in PSRnames:
    name = psrname.split('/')[-1].split('.')[0]
    psrdict.update({name: [{'pta':['EPTA']}]})

In [None]:
make_dataset(psrdict, outdir='partim_epta')

In [None]:
psrdict = {'J1713+0747': [{'pta': ['NANOGrav']}], 
           'J1909-3744': [{'pta': ['NANOGrav']}], 
           'J1640+2224': [{'pta': ['NANOGrav']}], 
           'J1600-3053': [{'pta': ['NANOGrav']}],
           'J2317+1439': [{'pta': ['NANOGrav']}], 
           'J1918-0642': [{'pta': ['NANOGrav']}], 
           'J1614-2230': [{'pta': ['NANOGrav']}], 
           'J1744-1134': [{'pta': ['NANOGrav']}],
           'J0030+0451': [{'pta': ['NANOGrav']}], 
           'J2145-0750': [{'pta': ['NANOGrav']}], 
           'J1857+0943': [{'pta': ['NANOGrav']}], 
           'J1853+1303': [{'pta': ['NANOGrav']}], 
           'J0613-0200': [{'pta': ['NANOGrav']}],
           'J1455-3330': [{'pta': ['NANOGrav']}], 
           'J1741+1351': [{'pta': ['NANOGrav']}], 
           'J2010-1323': [{'pta': ['NANOGrav']}], 
           'J1024-0719': [{'pta': ['NANOGrav']}], 
           'J1012+5307': [{'pta': ['NANOGrav']}],
           'J0437-4715': [{'pta': ['PPTA']}]
          }

In [None]:
make_dataset(psrdict, outdir='partim_filtered')

In [None]:
psrdict = {'J1713+0747': [{'pta': ['NANOGrav', 'PPTA']}], 
           'J1909-3744': [{'pta': ['NANOGrav', 'PPTA']}], 
           'J1640+2224': [{'pta': ['NANOGrav']}], 
           'J1600-3053': [{'pta': ['NANOGrav']}],
           'J2317+1439': [{'pta': ['NANOGrav']}], 
           'J1918-0642': [{'pta': ['NANOGrav']}], 
           'J1614-2230': [{'pta': ['NANOGrav']}], 
           'J1744-1134': [{'pta': ['NANOGrav', 'PPTA']}],
           'J0030+0451': [{'pta': ['NANOGrav']}], 
           'J2145-0750': [{'pta': ['NANOGrav']}], 
           'J1857+0943': [{'pta': ['NANOGrav']}], 
           'J1853+1303': [{'pta': ['NANOGrav']}], 
           'J0613-0200': [{'pta': ['NANOGrav']}],
           'J1455-3330': [{'pta': ['NANOGrav']}], 
           'J1741+1351': [{'pta': ['NANOGrav']}], 
           'J2010-1323': [{'pta': ['NANOGrav']}], 
           'J1024-0719': [{'pta': ['NANOGrav']}], 
           'J1012+5307': [{'pta': ['NANOGrav']}],
           'J0437-4715': [{'pta': ['PPTA']}]
          }

In [None]:
make_dataset(psrdict, outdir='partim_nanoppta')

In [None]:
for psrName in psrlist:
    parfile = 'partim/' + psrName + '.par'
    timfile = 'partim/' + psrName + '.tim'
    psr = t2.tempopulsar(parfile, timfile, maxobs=30000)
    ptas = ['PPTA', 'EPTA', 'NANOGrav']
    #plt.figure(figsize=(8,3))
    for pta in ptas:
        if pta in psr.flagvals('pta'):
            psr = filter_psr(psr, bw=400, dt=30, filters=[{'pta': [pta]}], plotting=False)
            if psr is not None:
                idx = ~psr.deletedmask()
                U, _ = utils.create_quantization_matrix(psr.toas()[idx]*86400, dt=86400, nmin=1)
                cadence = (psr.toas()[idx].max() - psr.toas()[idx].min()) / U.shape[1]
                #plt.errorbar(psr.toas()[idx]/86400, psr.residuals()[idx]*1e6, psr.toaerrs[idx], fmt='.', label=pta)
                print '...'
                print psrName, pta 
                print (psr.toas().max()-psr.toas().min()) / 365.25, (psr.toas()[idx].max()-psr.toas()[idx].min()) / 365.25
                print 1 / np.mean(1/psr.toaerrs[idx]), cadence
                print (psr.toas()[idx].max()-psr.toas()[idx].min())**(13/3.) * (np.mean(1/psr.toaerrs[idx]) * 1e-6)**2 / cadence
                print '...'
                print ''
#plt.legend(loc='best', fontsize=7)