In [2]:
import os
from astropy.table import Table
from tqdm import tqdm_notebook
import tempfile
from math import sqrt

# Add extra code to run things in parallel.
The combining exposure maps takes a REALLY long time. So I added this bit so I can run it all in parallel.

You don't need to run any of this if you don't want to run things in paralle. I'll try to comment out code at the bottom so you can run things in parallel or not depending on what you want to do.

In [3]:
from concurrent.futures import ThreadPoolExecutor, as_completed

def parallel_process(array, function, n_jobs=None, use_kwargs=False, front_num=0):
    """
        A parallel version of the map function with a progress bar. 

        Args:
            array (array-like): An array to iterate over.
            function (function): A python function to apply to the elements of array
            n_jobs (int, default=16): The number of cores to use
            use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of 
                keyword arguments to function 
            front_num (int, default=3): The number of iterations to run serially before kicking off the 
                parallel job. This can be useful for catching bugs
        Returns:
            [function(array[0]), function(array[1]), ...]
    """
    #We run the first few iterations serially to catch bugs
    if front_num > 0:
        front = [function(**a) if use_kwargs else function(a) for a in array[:front_num]]
    #If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging.
    if n_jobs==1:
        [function(**a) if use_kwargs else function(a) for a in tqdm_notebook(array[front_num:])]
        return 
    #Assemble the workers
    with ThreadPoolExecutor(max_workers=n_jobs) as pool:
        #Pass the elements of array into function
        if use_kwargs:
            futures = [pool.submit(function, **a) for a in array[front_num:]]
        else:
            futures = [pool.submit(function, a) for a in array[front_num:]]
        kwargs = {
            'total': len(futures),
            'unit': 'it',
            'unit_scale': False,
            'leave': True
        }
        #Print out the progress as tasks complete
        for f in tqdm_notebook(as_completed(futures), **kwargs):
            pass

In [4]:
def source_detect(name, outpath, pp=None):
    ''' This uses the detect function as part of the ximage package. This tool was originally
    written to work with SWIFT data. Because SWIFT was really designed to be a point source
    observatory, this tool is pretty good at finding point sources. 
    
    I think we have replaced this function with the second detect function, below, which uses
    a different tool, originally written to find extended sources in CHANDRA data. That seems 
    to be more of what we are trying to do, and it seems to have a similar performance.
    
    '''
    
    with open(f'{outpath}/{name}/{name}_ximg_det.in', 'w') as f:
        for eng in [200, 300, 400, 500, 600]:
            if not os.path.isfile(f'{outpath}/{name}/{name}_img_50-{eng}.fits'):
                continue
            f.writelines(f'read {outpath}/{name}/{name}_img_50-{eng}_bl4.fits\n')
            f.writelines('detect/snr_threshold=3/'
                    f'fitsdet={{{outpath}/{name}/{name}_img_50-{eng}_bl4.det.fits}}\n')
            f.writelines('detect/snr_threshold=3/'
                    f'filedet={{{outpath}/{name}/{name}_img_50-{eng}_bl4.det}}\n')
            
            f.writelines(f'read {outpath}/{name}/{name}_img_50-{eng}_bl8.fits\n')
            f.writelines('detect/snr_threshold=3/'
                    f'fitsdet={{{outpath}/{name}/{name}_img_50-{eng}_bl8.det.fits}}\n')
            f.writelines('detect/snr_threshold=3/'
                    f'filedet={{{outpath}/{name}/{name}_img_50-{eng}_bl8.det}}\n')
            
            # remove old files if they exist
            if os.path.isfile(f'{outpath}/{name}/{name}_img_50-{eng}_bl8.det.fits'):
                os.remove(f'{outpath}/{name}/{name}_img_50-{eng}_bl8.det.fits')
                
        f.writelines('exit\n')

    # log output
    log_file = f'{outpath}/{name}/{name}_ximg_det.log'
        
    # call xselect
    cmd = f'ximage < {outpath}/{name}/{name}_ximg_det.in > {log_file}'
    if not pp:
        os.system(cmd)
    else:
        pp.submit(call_cmd, cmd)
    
    return name

In [5]:
def detect_vtp(name, outpath, pp=None):
    # set up all the non-file-specific parameters
    params = {}
    params['scale'] = 1
    params['limit'] = 1E-6
    params['coarse'] = 5
    params['maxiter'] = 10
    params['regfile'] = f'{outpath}/{name}/{name}_vtp.reg'
    params['log'] = f'{outpath}/{name}/{name}_vtp.log'
    params['clobber'] = 'yes'
    params['verbose'] = 1
    
    evts = f'{outpath}/{name}/{name}_events.fits'
    expmap = f'{outpath}/{name}/{name}_exp.fits'
    outfits = f'{outpath}/{name}/{name}_vtp.detect'
    
    # check to make sure the files exist.
    if not os.path.isfile(evts) and not os.path.isfile(expmap):
        return 0
    
    with open(f'{outpath}/{name}/{name}_vtp.in', 'w') as f:
        # build the cmd
        cmd = f'vtpdetect {evts}[pi=50:600] {expmap} {outfits} '
        for param, value in list(params.items()):                                    
            cmd += f'{param}={value} '
        f.writelines(f'{cmd}\n')
    
    os.system(cmd)
    
    return name    

In [6]:
def psf(name, outpath, pp=None):
    
    with open(f'{outpath}/{name}/{name}_ximg_psf.in', 'w') as f:
        # loop over the blocked images
        for blk in [4, 8]:
            # loop over the energies
            for eng in [200, 300, 400, 500, 600]:
                if not os.path.isfile(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det'):
                    continue

                # figure out the background level
                with open(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det', 'r') as f2:
                    for l in f2.readlines():
                        if 'Back' in l:
                            background = float(l.split(':')[-1])
                            break

                # read the image
                f.writelines(f'read {outpath}/{name}/{name}_img_50-{eng}_bl{blk}.fits\n')

                # now we need to read the individual detections
                detects = Table.read(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det.fits')

                # write psf commands for each detection
                for i, (x, y) in enumerate(detects[['X', 'Y']]):
                    f.writelines(f'psf/xpix={x}/ypix={y}/back={background}/radius=4.25/noplot/'
                                f'fileplot={{{outpath}/{name}/{name}_img_50-{eng}_bl{blk}_{i}.psf}}\n')

                    
        f.writelines('exit\n')

    # log output
    log_file = f'{outpath}/{name}/{name}_ximg_psf.log'
        
    # call xselect
    cmd = f'ximage < {outpath}/{name}/{name}_ximg_psf.in > {log_file}'
    
    os.system(cmd)
        
    return name

In [7]:
def centroid(name, outpath, pp=None):
    
        # loop over the blocked images
        for blk in [4, 8]:
            # loop over the energies
            for eng in [200, 300, 400, 500, 600]:
                if not os.path.isfile(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det'):
                    continue

                # figure out the background level
                with open(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det', 'r') as f2:
                    for l in f2.readlines():
                        if 'Back' in l:
                            background = float(l.split(':')[-1])
                            break

                # now we need to read the individual detections
                detects = Table.read(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det.fits')
                
                # write psf commands for each detection
                for i, (ra, dec) in enumerate(detects[['RA', 'DEC']]):
                    with open(f'{outpath}/{name}/{name}_cent_50-{eng}_bl{blk}_{i}.in', 'w') as f:
                        cmd = 'xrtcentroid '
                        cmd += f'infile={outpath}/{name}/{name}_img_50-{eng}_bl{blk}.fits '
                        cmd += f'outfile={name}_img_50-{eng}_bl{blk}_{i}.cent '
                        cmd += f'outdir={outpath}/{name} '
                        cmd += f'boxra={ra} '
                        cmd += f'boxdec={dec} '
                        cmd += 'calcpos=yes ' 
                        cmd += 'interactive=no '
                        cmd += 'boxradius=1 '
                        cmd += 'clobber=yes'

                        f.writelines(cmd)
                        
                    # log output
                    log_file = f'{outpath}/{name}/{name}_cent_50-{eng}_bl{blk}_{i}.log'
        
                    # call xselect
                    cmd = f'{cmd} > {log_file}'
                    
                    os.system(cmd)

In [8]:
def get_radprof(name, outpath, overwrite=False):
    # Swift XRT pixel scale
    # 1 pixel = 6.548089E-04 * 3600 arcsec
    pixscale = 6.548089E-04 * 3600
    
    # model parameters
    params = {}
    params['n'] = 75
    params['r0'] = 0 
    params['dr'] = 4
    
    # check for files
    # events image
    if not os.path.isfile(f'{outpath}/{name}/{name}_img_50-200.fits'):
        return
    else:
        evnts = f'{outpath}/{name}/{name}_img_50-200.fits'
    # expo map
    if not os.path.isfile(f'{outpath}/{name}/{name}_exp.fits'):
        return
    else:
        expmap = f'{outpath}/{name}/{name}_exp.fits'   
    # detections
    if not os.path.isfile(f'{outpath}/{name}/{name}_vtp.detect'):
        return
    else:
        srcs = f'{outpath}/{name}/{name}_vtp.detect'
    
    # now we need to read the individual detections
    detects = Table.read(srcs, hdu=1)
    
    # we are going to compute profiles for the 10 biggest -- this is for speed. 
    detects.sort(['SRC_AREA'], reverse=True)
    
    # loop over the sources -- component is the source ID number
    for i, xc, yc in detects[['COMPONENT', 'X', 'Y']][:10]:
        # decide whether we need to redo the profile
        if overwrite:
            pass
        elif os.path.isfile(f'{outpath}/{name}/{name}_vtp_{i}.radprof'):
            continue
        else:
            pass
        
        # output file
        with open(f'{outpath}/{name}/{name}_vtp_{i-1}.radprof', 'w') as radprof:
            radprof.write(f"# source number and position: {name}_{i} {xc:.3f} {yc:.3f}\n")
            radprof.write(f"# profile parameters: {params['n']} {params['r0']} {params['dr']}\n")
            radprof.write(f"# bin r1 r2 x y w sb sb_err\n")

            fd2, path2 = tempfile.mkstemp() # this is the temp file to store the awk output
            for irad in range(params['n']):
                r1 = params['r0'] + irad * params['dr']
                r2 = params['r0'] + (irad + 1) * params['dr']

                fd, path = tempfile.mkstemp()
                with os.fdopen(fd, 'w') as reg:
                    reg.write("# Region file format: CIAO version 1.0\n")
                    reg.write(f"+annulus({xc},{yc},{r1},{r2})\n")
                    for j, xc1, yc1, rc, rotc in detects[['COMPONENT', 'X', 'Y', 'R', 'ROTANG']]:
                        if not j == i:
                            reg.write(f"-ellipse({xc1},{yc1},{rc[0]},{rc[1]},{rotc})\n")
                          
                #  No. of counts from data image
                cmd = f"dmstat '{evnts}[(x,y)=region({path})]' centroid- | grep sum | awk '{{print $2}}' > {path2}"
                #print(cmd)
                os.system(cmd)
                with open(path2) as tmp:
                    x = tmp.readlines()[0][:-1]
                x = float(x)

                #  Mean exposure
                cmd = f"dmstat \"{expmap}[(x,y)=region({path})]\" centroid- | grep mean | awk '{{print $2}}' > {path2}"
                os.system(cmd)
                with open(path2) as tmp:
                    y = tmp.readlines()[0][:-1]
                y = float(y)

                #  No. of pixels
                cmd = f"dmstat \"{expmap}[(x,y)=region({path})]\" centroid- | grep good | awk '{{print $2}}' > {path2}"
                os.system(cmd)
                with open(path2) as tmp:
                    w = tmp.readlines()[0][:-1]
                w = float(w)

                os.remove(path)
            
                sb = x / (y * w * pixscale**2 / 3600)  #  cts/s/arcmin^2
                err_gehrels = sqrt(x + 0.75)
                sbe = (1. + err_gehrels) / (y * w * pixscale**2 / 3600)  #  cts/s/arcmin^2

                radprof.write(f"{irad+1:3d} {r1:7d} {r2:7d} {x:9.1f} {y:9.1f} {w:9.1f} {sb:12.4e} {sbe:12.4e}\n")
            os.remove(path2)

    return

In [9]:
def load_PSZcatalog():
    from astropy.table import Table                                                       
    from numpy import append as npappend                                             

    datapath = './../planckClusters/catalogs/'
    
    ps1 = Table.read(f'{datapath}/PSZ1v2.1.fits')
    ps2 = Table.read(f'{datapath}/PSZ2v1.fits')

    # convert to pandas
    df1 = ps1.to_pandas()
    df2 = ps2.to_pandas()

    # clean up strings -- not required
    df1 = df1.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    df2 = df2.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)

    # merge the catalogs together
    df_m = df1.merge(df2, how='outer', left_on='INDEX', right_on='PSZ', suffixes=('_PSZ1', '_PSZ2'))
    
    # get the columns that we want
    cols = df_m.columns[[0, 1, 4, 5, 8, 29, 33, 34, 37, 38, 40, 51]]
    df_final = df_m[cols]

    # remerge to find bits that were missing                                        
    df_final_bigger = df_final.merge(df2, how='left', left_on='INDEX_PSZ1',         
                                 right_on='PSZ')
    # fill in nans                                                                  
    for col in ['NAME', 'RA', 'DEC', 'SNR', 'REDSHIFT', 'INDEX']:                   
        df_final_bigger[col+'_PSZ2'] = df_final_bigger[col+'_PSZ2'].fillna(df_final_bigger[col])
    # fill in nans                                                                  
    for col in ['NAME', 'RA', 'DEC', 'SNR', 'REDSHIFT', 'INDEX']:
        df_final_bigger[col+'_PSZ2'] = df_final_bigger[col+'_PSZ2'].fillna(df_final_bigger[col])
    for col in ['NAME', 'RA', 'DEC']:
        df_final_bigger[col] = df_final_bigger[col+'_PSZ2'].fillna(df_final_bigger[col+'_PSZ1'])

    df_final_bigger = df_final_bigger[npappend(df_final_bigger.columns[:12].values, ['NAME', 'RA', 'DEC'])]

    return df_final_bigger


In [10]:
# get file data
data = load_PSZcatalog()
data = data.sort_index(axis=1)

outpath = './data_full'

arr = [{'name':n.replace(' ', '_'), 'outpath':outpath, 'overwrite':True} for n in data['NAME']]
#parallel_process(arr, source_detect, use_kwargs=True)
#parallel_process(arr, detect_vtp, use_kwargs=True)
#parallel_process(arr, psf, use_kwargs=True)
#parallel_process(arr, centroid, use_kwargs=True)
parallel_process(arr, get_radprof, use_kwargs=True, n_jobs=12)



HBox(children=(IntProgress(value=0, max=1943), HTML(value='')))






In [None]:
name = 'PSZ1_G057.42-10.77'

In [None]:
outpath = './data_full'

In [None]:
get_radprof(name, outpath, overwrite=True)