# query_dl_nicer.ipynb
This notebook:
* queries the NICER observation tables for a source
* saves the meta information on observations for a source to a table
* downloads the data files from the NASA servers
* decrypts GPG files if needed
* applies the barycenter correction

You must have heainit running in the shell you're running this from in order to run `nicerl2` and `barycorr`! If it's not running, close the notebook, run heainit, then re-open the notebook.

In [None]:
import os
import subprocess
import requests  ## to get and read in the website for BeautifulSoup to parse
from bs4 import BeautifulSoup  ## to scrape and parse the website html
from astropy.table import Table, Column  ## to use astropy tables as our data storage and interaction format
from astropy import units as u
import urllib.request ## to download the data files via ftp
import sys
import numpy as np
from datetime import datetime

## Credentials and decryption passphrase

## Methods that parse the observation segment summary table website

In [None]:
def scrape_site_for_seg_table():
    """ Scrapes the NICER target segment website for the body and header 
        of the big table listing all the observations. """
    nicer_tab_url="https://heasarc.gsfc.nasa.gov/docs/nicer/team_schedule/nicer_seg_team.html"
    response = requests.get(nicer_tab_url, auth=(cred_user, cred_pass))  # using the NICER team credentials
#     print(response)
    soup = BeautifulSoup(response.text, 'lxml')
#     print(soup)
    t = soup.find('table', attrs={'id':'nicer_observation_segment_summary'})
    t_header = t.find('thead')
    t_body = t.find('tbody')
    return t_header, t_body

def get_column_labels(t_head):
    """ Gets the text for the column headers from the table on the website, 
        to be used as the 'description' for our output table. """
    label_list = []
    h_rows = t_head.find_all('th')
    for row in h_rows:
        label=row.text
        label_list.append(label)
    return label_list

def get_object_observation_table(obj_name):
    """ Gets and makes a table of all the observations for a specified object, 
        and returns the information as an astropy table. """
    tab_header, tab_body = scrape_site_for_seg_table()
    col_labels = get_column_labels(tab_header)

    obsID_list = []
    start_time_list = []
    stop_time_list = []
    ra_list = []
    dec_list = []
    expo_ontarg_list = []
    expo_good_list = []
    proc_date_list = []
    proc_state_list = []
    proc_status_list = []
    proc_ver_list = []
    dir_link_list = []
    tar_link_list = []

    # rows = tab_body.find_all('tr')[0:20]
    rows = tab_body.find_all('tr')
    for row in rows:
        cells = row.select("td")
#         print(cells)
        target_id = int(cells[3].string)
        if target_id != 0:  # the Null target name, where there's no data
            target_name = cells[4].string
            if target_name == obj_name: 
                obsID = int(cells[0].string)
                start_time = datetime.strptime(cells[1].string, "%Y-%m-%dT%H:%M:%S")
                stop_time = datetime.strptime(cells[2].string, "%Y-%m-%dT%H:%M:%S")
                ra = float(cells[5].string)
                dec = float(cells[6].string)
                expo_ontarg = cells[7].string
                expo_good = cells[8].string
                proc_date = datetime.strptime(cells[9].string, "%Y-%m-%dT%H:%M:%S")
                proc_state = cells[10].string
                proc_status = cells[11].string
                proc_ver = cells[12].string
                archive_links = cells[13].find_all('a', href=True)
                if len(str(archive_links)) > 20:
                    ## index '0' gets the directory url, '-1' gets the tar bundle url
                    dir_link = archive_links[0]['href']
                    tar_link = archive_links[-1]['href']
                    obsID_list.append(obsID)
                    start_time_list.append(start_time)
                    stop_time_list.append(stop_time)
                    ra_list.append(ra)
                    dec_list.append(dec)
                    expo_ontarg_list.append(expo_ontarg)
                    expo_good_list.append(expo_good)
                    proc_date_list.append(proc_date)
                    proc_state_list.append(proc_state)
                    proc_status_list.append(proc_status)
                    proc_ver_list.append(proc_ver)
                    dir_link_list.append(dir_link)
                    tar_link_list.append(tar_link)

    obsID_col = Column(obsID_list, name='obs id', dtype='i', description=col_labels[0])
    start_time_col = Column(start_time_list, name='start time', dtype=datetime, unit='UTC', description=col_labels[1])
    stop_time_col = Column(stop_time_list, name='stop time', dtype=datetime, unit='UTC', description=col_labels[2])
    ra_col = Column(ra_list, name='ra', dtype='f', unit=u.deg, description=col_labels[5])
    dec_col = Column(dec_list, name='dec', dtype='f', unit=u.deg, description=col_labels[6])
    expo_ontarg_col = Column(expo_ontarg_list, name='on-targ expo', dtype='f', unit=u.s, description=col_labels[7])
    expo_good_col = Column(expo_good_list, name='good expo', dtype='f', unit=u.s, description=col_labels[8])
    proc_date_col = Column(proc_date_list, name='proc date', dtype=datetime, description=col_labels[9])
    proc_state_col = Column(proc_state_list, name='proc state', dtype='U', description=col_labels[10])
    proc_status_col = Column(proc_status_list, name='proc status', dtype='i1', description=col_labels[11])
    proc_ver_col = Column(proc_ver_list, name='proc ver', dtype='U', description=col_labels[12])
    dir_link_col = Column(dir_link_list, name='dir link', dtype='U', description='Archive link, directory')
    tar_link_col = Column(tar_link_list, name='tar link', dtype='U', description='Archive link, tarball')
    ## more on datatypes in python here: https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html

    obj_tab = Table([obsID_col, start_time_col, stop_time_col, ra_col, 
                    dec_col, expo_ontarg_col, expo_good_col, proc_date_col, 
                    proc_state_col, proc_status_col, proc_ver_col,
                    dir_link_col, tar_link_col])
    return obj_tab

## Get a table of all the NICER observations for a specified source

In [None]:
obj_name = "Swift_J1728.9-3613"
obj_prefix = "SwiftJ1728"
obj_tab = get_object_observation_table(obj_name)
print(obj_tab.info)

In [None]:
print(min(obj_tab['start time']))
print(max(obj_tab['start time']))
# print(obj_tab['obs id','start time', 'good expo'])

In [None]:
tmp_tab = obj_tab
## Certain obsIDs
# obsID_want = [1050390122, 1050390134]
# obsID_mask = [x in obsID_want for x in tmp_tab['obs id']]
# tmp_tab = tmp_tab[obsID_mask]
## Exposure longer than 100 seconds
exp_mask = [x > 100 for x in tmp_tab['on-targ expo']]
tmp_tab = tmp_tab[exp_mask]
## obsIDs greater than one (proprietary GOF ones have negative numbers)
obsid_mask = [x > 1 for x in tmp_tab['obs id']]
tmp_tab = tmp_tab[obsid_mask]
## Dates earlier than Sept 1 2018
# date_mask = [(x >= datetime(2018, 6, 1)) and (x < datetime(2018, 8, 1)) for x in tmp_tab['start time']]

# date_mask = [(x >= datetime(2021, 2, 1)) for x in tmp_tab['start time']]
# tmp_tab = tmp_tab[date_mask]
print(tmp_tab['obs id','start time', 'on-targ expo', 'good expo'])

If you like what you see, download those observations!

In [None]:
obj_tab = tmp_tab
print(obj_tab)

## Save obsID list

In [None]:
homedir = os.path.expanduser("~")
exe_dir = os.getcwd()
listdir = "%s/Documents/Research/%s/in" % (homedir, obj_prefix)
if not os.path.exists(listdir):
    os.makedirs(listdir)  # Recursive mkdir
obsID_file = "%s/%s_obsIDs.txt" % (listdir, obj_prefix)
np.savetxt(obsID_file, obj_tab['obs id'], fmt="%d")

## Specify local directories for downloading the data files

In [None]:
data_dir = homedir+"/Reduced_data/"+obj_name+"/"
if not os.path.exists(data_dir):
    os.mkdir(data_dir)
dl_script = data_dir+"download.sh"
dl_log = data_dir+"download.log"

## Downloading the data files via FTP

In [None]:
print(obj_tab['tar link'][0])

In [None]:
tar_list = []
with open(dl_script, 'w') as f:
    f.write("rm %s \n" % dl_log)
    for tab_row in obj_tab:
        obsID =  str(tab_row['obs id'])
        obsID_dir = data_dir + "/" + obsID
        if not os.path.exists(obsID_dir):
            f.write("mkdir %s \n" % obsID_dir)
            if not os.path.exists(obsID_dir+"/auxil"):
                f.write("mkdir %s/auxil \n" % obsID_dir)
                ftp_orb = tab_row['dir link'] + obsID + "/auxil/ni" + obsID +".orb.gz"
                f.write("wget -r --directory-prefix=%s --append-output=%s -nv -nH --cut-dirs=6 --show-progress --progress=bar %s \n" % \
                        (obsID_dir, dl_log, ftp_orb))
#                 ftp_att = tab_row['dir link'] + obsID + "/auxil/ni" + obsID +".att.gz"
#                 f.write("wget -r --directory-prefix=%s --append-output=%s -nv -nH --cut-dirs=6 --show-progress --progress=bar %s \n" % \
#                         (obsID_dir, dl_log, ftp_att))
#                 ftp_cat = tab_row['dir link'] + obsID + "/auxil/ni" + obsID +".cat"
#                 f.write("wget -r --directory-prefix=%s --append-output=%s -nv -nH --cut-dirs=6 --show-progress --progress=bar %s \n" % \
#                         (obsID_dir, dl_log, ftp_cat))
#                 ftp_mkf = tab_row['dir link'] + obsID + "/auxil/ni" + obsID +".mkf.gz"
#                 f.write("wget -r --directory-prefix=%s --append-output=%s -nv -nH --cut-dirs=6 --show-progress --progress=bar %s \n" % \
#                         (obsID_dir, dl_log, ftp_mkf))
            if not os.path.exists(obsID_dir+"/xti"):
                f.write("mkdir %s/xti \n" % obsID_dir)
#                 f.write("mkdir %s/xti/hk \n" % obsID_dir)
#                 f.write("mkdir %s/xti/event_uf \n" % obsID_dir)
#                 for mpu in range(0,7):
#                     ftp_hk = tab_row['dir link'] +"%s/xti/hk/ni%s_0mpu%d.hk.gz" % (obsID, obsID, mpu)
#                     f.write("wget -r --directory-prefix=%s --append-output=%s -nv -nH --cut-dirs=6 --show-progress --progress=bar %s \n" % \
#                             (obsID_dir, dl_log, ftp_hk))
#                     ftp_uf = tab_row['dir link'] +"%s/xti/event_uf/ni%s_0mpu%d_uf.evt.gz" % (obsID, obsID, mpu)
#                     f.write("wget -r --directory-prefix=%s --append-output=%s -nv -nH --cut-dirs=6 --show-progress --progress=bar %s \n" % \
#                             (obsID_dir, dl_log, ftp_uf))
                ## If you only want to do quick analysis, download just the orb file (for barycentering) and this cl file
                f.write("mkdir %s/xti/event_cl \n" % obsID_dir)
                ftp_cl = tab_row['dir link'] +"%s/xti/event_cl/ni%s_0mpu7_cl.evt.gz" % (obsID, obsID)
                f.write("wget -r --directory-prefix=%s --append-output=%s -nv -nH --cut-dirs=6 --show-progress --progress=bar %s \n" % \
                            (obsID_dir, dl_log, ftp_cl))
                
#         ## Downloading the whole thing -- only do this if the files are on the small side
#         tar_list.append(obsID+".tar.gz")
#         if not os.path.exists(obsID):
#             f.write("wget -r --directory-prefix=%s --append-output=%s -nv -nH --cut-dirs=4 --show-progress --progress=bar %s \n" % \
#                     (data_dir, dl_log, tab_row['tar link']))
print("Run these things at the command line:")
print("bash")
print("cd %s" % data_dir)
print("chmod u+x %s" % os.path.basename(dl_script))
print("./%s" % os.path.basename(dl_script))

## Running nicerl2 (if you didn't download the event_cl files) 
(nicercal, niextract-events, nimaketime, and nicer-mergeclean with min_fpm=7) to get cleaned event lists

In [None]:
red_script = data_dir+"reduce.sh"
red_log = data_dir+"reduce.log"

with open(red_script, 'w') as f:
    f.write("rm %s \n" % red_log)
    for obsid in obj_tab['obs id']:
        f.write("nicerl2 indir=%s clobber=yes \n" % str(obsid)) 
print("Run these things at the command line:")
print("bash")
print("heainit")
print("cd %s" % data_dir)
print("chmod u+x %s" % os.path.basename(red_script))
print("./%s" % os.path.basename(red_script))

## If files are encrypted, decrypt here.

## Make a list of the event files (in their local directory)

In [None]:
cl_list = []
orb_list = []
for obsid in obj_tab['obs id']:
    cl_file = "%s/%s/xti/event_cl/ni%s_0mpu7_cl.evt.gz" % (data_dir, str(obsid), str(obsid))
    orb_file = "%s/%s/auxil/ni%s.orb.gz" % (data_dir, str(obsid), str(obsid))
    if os.path.exists(cl_file):
        cl_list.append(cl_file)
    else:
        print("CL does not exist for obsid=%s" % str(obsid))
    if os.path.exists(orb_file):
        orb_list.append(orb_file)
    else:
        print("ORB does not exist for obsid=%s" % str(obsid))

In [None]:
## If the files were encrypted, use these two lines
# evt_list = [filename.replace('.gpg','') for filename in evt_list]
# orb_list = [filename.replace('.gpg','') for filename in orb_list]

evt_list = [os.path.relpath(filename,start=data_dir) for filename in cl_list]
print(evt_list)
orb_list = [os.path.relpath(filename,start=data_dir) for filename in orb_list]
print(orb_list)
assert len(evt_list) == len(orb_list)

## Save event list file names to a text file

In [None]:
evt_out_file = listdir + "/in/" + obj_prefix + "_evtlists.txt"
with open(evt_out_file, 'w') as f:
    [f.write("%s\n" % evt_name) for evt_name in evt_list]
print("Event list printed to: %s" % evt_out_file)

## Apply barycenter correction
It doesn't like doing this to zipped files, so it will probably give a segmentation fault.

In [None]:
os.chdir(data_dir)
bary_list = []
bary_script = obj_name+"_barycorr.sh"
print("chmod u+x %s" % bary_script)
with open(bary_script, mode='w') as out:
    for (evt_file, orb_file) in zip(evt_list, orb_list):
        bary_file = evt_file.replace('.evt', '_bary.evt')
    #     barycorr infile=evt_file outfile=bary_file orbitfiles=orb_file refframe=ICRS clobber=yes
        bary_cmd = "barycorr infile=./%s outfile=./%s orbitfiles=./%s refframe=ICRS clobber=yes" % (evt_file, bary_file, orb_file)
        out.write(bary_cmd+"\n")
    #     bary_cmd = ['barycorr', 'infile=%s' % evt_file, 'outfile=%s' % bary_file, 'orbitfiles=%s' % orb_file, 'refframe=ICRS', 'clobber=yes']
#         p = subprocess.Popen(bary_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
#         output = p.communicate()
#         print(output)
#         normal_output = output[0]
#         error = output[1]
#         exitCode = p.returncode
        bary_list.append(bary_file)
# os.chmod(bary_script, 754)
# p = subprocess.Popen(bary_script, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)        
print("./"+bary_script)

In [None]:
bary_out_file = listdir + "/in/" + obj_prefix + "_bary_evtlists.txt"
with open(bary_out_file, 'w') as f:
    [f.write("%s\n" % bary_name) for bary_name in bary_list]
print("Barycentered event list printed to: %s" % bary_out_file)

barycorr infile=ni1108030106_0mpu7_cl.evt.gz outfile=ni1108030106_0mpu7_cl_bary.evt.gz orbitfiles=ni1108030106.orb.gz refframe=ICRS clobber=yes

