In [None]:
from __future__ import division, print_function, unicode_literals

import os, glob
import libstempo as t2

import dr2lite_utils as dr2u

This notebook generates the `cut` and `freqfilt` datasets for the full IPTA.  It also splits out each dataset (including `classic`) by PTA.  This results in 12 datasets:

`data/partim_${type}/${pta}`

# data preprocessing for `cut` analysis!

start with full DR2, a.k.a. `classic`

In [None]:
DR2DATA = os.path.abspath('/home/pbaker/nanograv/data/DR2/')  # path to data local usage
datadir = os.path.join(DR2DATA, 'release/VersionB')

outdir = 'data/partim_classic'
os.system('mkdir -p {}'.format(outdir));

### clean `.par`, combine `.tim`, make pulsar list

In [None]:
parfiles = glob.glob(datadir + '/J*/*IPTADR2.par')

psr_names = []
for p in parfiles:
    name = p.split('/')[-2]
    psr_names.append(name)
    outfile = os.path.join(outdir, '{}.par'.format(name))
    dr2u.clean_par(p, outfile)

In [None]:
timfiles = glob.glob(datadir + '/J*/*IPTADR2.tim')

for t in timfiles:
    name = t.split('/')[-2]
    outfile = os.path.join(outdir, '{}.tim'.format(name))
    dr2u.combine_tim(t, outfile)

In [None]:
psr_names.sort()

psrfile = os.path.join(outdir, 'psrlist_classic.txt')
with open(psrfile, 'w') as f:
    for pname in psr_names:
        f.write("{:s}\n".format(pname))

## generate `cut` dataset

TODO: alter `dr2lite_utils.py` to allow for a list of flags to exclude (in addition to keep)

In [None]:
filt = {'group': 
                ['327_ASP', '430_ASP', 'L-wide_ASP', 'S-wide_ASP',
                 '327_PUPPI', '430_PUPPI', 'L-wide_PUPPI',  'S-wide_PUPPI',
                 'Rcvr_800_GASP', 'Rcvr1_2_GASP',
                 'Rcvr_800_GUPPI', 'Rcvr1_2_GUPPI',
                 'PDFB_10CM', 'PDFB_20CM', 'PDFB_40CM',
                 'CPSR2_20CM', 'CPSR2_50CM',
                 'WBCORR_10CM', 'WBCORR_20CM',
                 'EFF.EBPP.1360', 'EFF.EBPP.1410', 'EFF.EBPP.2639',
                 'JBO.DFB.1400', 'JBO.DFB.1520', 'JBO.DFB.5000',
                 'NRT.BON.1400', 'NRT.BON.1600', 'NRT.BON.2000',
                 'WSRT.P1.328', 'WSRT.P1.328.C', 'WSRT.P1.323.C',
                 'WSRT.P1.382', 'WSRT.P1.382.C', 'WSRT.P1.367.C',
                 'WSRT.P1.840', 'WSRT.P1.840.C',
                 'WSRT.P1.1380', 'WSRT.P1.1380.C',
                 'WSRT.P1.1380.1',
                 'WSRT.P1.1380.2', 'WSRT.P1.1380.2.C',
                 'WSRT.P1.2273.C',
                ]
        }  # list of all non-legacy backends (is this complete?)

psrdict = {}
for p in psr_names:
    psrdict[p] = filt

dr2u.make_dataset(psrdict, indir='data/partim_classic', outdir='data/partim_cut_IPTA', tmin=2, min_toas=10)

generate pulsar list for `cut` dataset

In [None]:
datadir = 'data/partim_cut_IPTA'
parfiles = glob.glob('data/partim_cut_IPTA/*.par')
psrlist = []
for p in parfiles:
    name = p.split('/')[-1]
    psrlist.append(name.split('.')[0])
psrlist.sort()

list_file = os.path.join(datadir, 'psrlist_cut_IPTA.txt')
with open(list_file, 'w') as f:
    for pname in psrlist:
        f.write("{:s}\n".format(pname))

### split out each PTA from full `cut` dataset

In [None]:
datadir = 'data/partim_cut_IPTA'

# get psrlist, if you skipped above steps
list_file = os.path.join(datadir, 'psrlist_cut_IPTA.txt')
with open(list_file, 'r') as f:
    psrlist = [line.strip() for line in f]

In [None]:
PTAs = ['NANOGrav', 'PPTA', 'EPTA']
outdir_base = 'data/partim_cut_{:s}'

for pta in PTAs:
    filt = {'pta':[pta]}
    psrdict = {}
    for p in psrlist:
        psrdict[p] = filt
    
    # no additional filtering... just select by PTA
    dr2u.make_dataset(psrdict, indir=datadir, outdir=outdir_base.format(pta),
                      frequency_filter=False, min_toas=1, tmin=1)

In [None]:
outdir_base = 'data/partim_cut_{:s}'
for pta in ['NANOGrav', 'PPTA', 'EPTA']:

    datadir = outdir_base.format(pta)
    parfiles = glob.glob('{:s}/*.par'.format(datadir))
    psrnames = [p.split('/')[-1].split('.')[0] for p in parfiles]
    psrnames.sort()

    list_file = os.path.join(datadir, 'psrlist_cut_{:s}.txt'.format(pta))
    with open(list_file, 'w') as f:
        for pname in psrnames:
            f.write("{:s}\n".format(pname))

## generate per-PTA `classic` datasets

same per-PTA split out procedure as `cut`, but start with `classic`

In [None]:
datadir = 'data/partim_classic_IPTA'

# get psrlist, if you skipped above steps
list_file = os.path.join(datadir, 'psrlist_classic_IPTA.txt')
with open(list_file, 'r') as f:
    psrlist = [line.strip() for line in f]

In [None]:
PTAs = ['NANOGrav', 'PPTA', 'EPTA']
outdir_base = 'data/partim_classic_{:s}_test'

for pta in PTAs:
    filt = {'pta':[pta]}
    psrdict = {}
    for p in psrlist:
        psrdict[p] = filt
    
    # no additional filtering... just select by PTA
    dr2u.make_dataset(psrdict, indir=datadir, outdir=outdir_base.format(pta),
                      frequency_filter=False, min_toas=2, tmin=0)

generate pulsar lists

In [None]:
outdir_base = 'data/partim_classic_{:s}'
for pta in ['NANOGrav', 'PPTA', 'EPTA']:

    datadir = outdir_base.format(pta)
    parfiles = glob.glob('{:s}/*.par'.format(datadir))
    psrnames = [p.split('/')[-1].split('.')[0] for p in parfiles]
    psrnames.sort()

    list_file = os.path.join(datadir, 'psrlist_classic_{:s}.txt'.format(pta))
    with open(list_file, 'w') as f:
        for pname in psrnames:
            f.write("{:s}\n".format(pname))

## generate `freqfilt` dataset

In [None]:
datadir = 'data/partim_classic_IPTA'

# get psrlist, if you skipped above steps
list_file = os.path.join(datadir, 'psrlist_classic_IPTA.txt')
with open(list_file, 'r') as f:
    psrlist = [line.strip() for line in f]

In [None]:
filt = {'pta':['NANOGrav', 'PPTA', 'EPTA']}  # keep all backends

psrdict = {}
for p in psrlist:
    psrdict[p] = filt

dr2u.make_dataset(psrdict, indir='data/partim_classic_IPTA', outdir='data/partim_freqfilt_IPTA',
                  frequency_filter=True, tmin=2, min_toas=10)

In [None]:
datadir = 'data/partim_freqfilt_IPTA'
parfiles = glob.glob('data/partim_freqfilt_IPTA/*.par')
psrlist = []
for p in parfiles:
    name = p.split('/')[-1]
    psrlist.append(name.split('.')[0])
psrlist.sort()

list_file = os.path.join(datadir, 'psrlist_freqfilt_IPTA.txt')
with open(list_file, 'w') as f:
    for pname in psrlist:
        f.write("{:s}\n".format(pname))

### split out each PTA from full `freqfilt` dataset

In [None]:
PTAs = ['NANOGrav', 'PPTA', 'EPTA']
outdir_base = 'data/partim_classic_{:s}_test'

for pta in PTAs:
    filt = {'pta':[pta]}
    psrdict = {}
    for p in psrlist:
        psrdict[p] = filt
    
    # no additional filtering... just select by PTA
    dr2u.make_dataset(psrdict, indir=datadir, outdir=outdir_base.format(pta),
                      frequency_filter=False, min_toas=2, tmin=0)

In [None]:
outdir_base = 'data/partim_freqfilt_{:s}'
for pta in ['NANOGrav', 'PPTA', 'EPTA']:

    datadir = outdir_base.format(pta)
    parfiles = glob.glob('{:s}/*.par'.format(datadir))
    psrnames = [p.split('/')[-1].split('.')[0] for p in parfiles]
    psrnames.sort()

    list_file = os.path.join(datadir, 'psrlist_freqfilt_{:s}.txt'.format(pta))
    with open(list_file, 'w') as f:
        for pname in psrnames:
            f.write("{:s}\n".format(pname))