In [None]:
from astropy.table import Table, Column
from tqdm import tqdm_notebook
import tempfile
from math import sqrt, sin, cos, pow
import numpy as np

import os
import sys
sys.path.append(f'{os.environ["HOME"]}/Projects/planckClusters/catalogs')
from load_catalogs import load_PSZcatalog

# Add extra code to run things in parallel.
The combining exposure maps takes a REALLY long time. So I added this bit so I can run it all in parallel.

You don't need to run any of this if you don't want to run things in paralle. I'll try to comment out code at the bottom so you can run things in parallel or not depending on what you want to do.

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed

def parallel_process(array, function, n_jobs=None, use_kwargs=False, front_num=0):
    """
        A parallel version of the map function with a progress bar. 

        Args:
            array (array-like): An array to iterate over.
            function (function): A python function to apply to the elements of array
            n_jobs (int, default=16): The number of cores to use
            use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of 
                keyword arguments to function 
            front_num (int, default=3): The number of iterations to run serially before kicking off the 
                parallel job. This can be useful for catching bugs
        Returns:
            [function(array[0]), function(array[1]), ...]
    """
    #We run the first few iterations serially to catch bugs
    if front_num > 0:
        front = [function(**a) if use_kwargs else function(a) for a in array[:front_num]]
    #If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging.
    if n_jobs==1:
        [function(**a) if use_kwargs else function(a) for a in tqdm_notebook(array[front_num:])]
        return 
    #Assemble the workers
    with ThreadPoolExecutor(max_workers=n_jobs) as pool:
        #Pass the elements of array into function
        if use_kwargs:
            futures = [pool.submit(function, **a) for a in array[front_num:]]
        else:
            futures = [pool.submit(function, a) for a in array[front_num:]]
        kwargs = {
            'total': len(futures),
            'unit': 'it',
            'unit_scale': False,
            'leave': True
        }
        #Print out the progress as tasks complete
        for f in tqdm_notebook(as_completed(futures), **kwargs):
            pass

In [None]:
def source_detect(name, outpath, pp=None):                                       
    ''' This uses the detect function as part of the ximage package. This tool was originally
    written to work with SWIFT data. Because SWIFT was really designed to be a point source
    observatory, this tool is pretty good at finding point sources.

    I think we have replaced this function with the second detect function, below, which uses
    a different tool, originally written to find extended sources in CHANDRA data. That seems
    to be more of what we are trying to do, and it seems to have a similar performance.

    '''                                                                        

    with open(f'{outpath}/{name}/{name}_ximg_det.in', 'w') as f:
        for eng in [200, 300, 400, 500, 600]:
            if not os.path.isfile(f'{outpath}/{name}/{name}_img_50-{eng}.fits'):
                continue                                                       
            f.writelines(f'read {outpath}/{name}/{name}_img_50-{eng}_bl4.fits\n')
            f.writelines('detect/snr_threshold=3/'
                    f'fitsdet={{{outpath}/{name}/{name}_img_50-{eng}_bl4.det.fits}}\n')
            f.writelines('detect/snr_threshold=3/'
                    f'filedet={{{outpath}/{name}/{name}_img_50-{eng}_bl4.det}}\n')
                                                                               
            f.writelines(f'read {outpath}/{name}/{name}_img_50-{eng}_bl8.fits\n')
            f.writelines('detect/snr_threshold=3/'
                    f'fitsdet={{{outpath}/{name}/{name}_img_50-{eng}_bl8.det.fits}}\n')
            f.writelines('detect/snr_threshold=3/'
                    f'filedet={{{outpath}/{name}/{name}_img_50-{eng}_bl8.det}}\n')

            # remove old files if they exist
            if os.path.isfile(f'{outpath}/{name}/{name}_img_50-{eng}_bl8.det.fits'):
                os.remove(f'{outpath}/{name}/{name}_img_50-{eng}_bl8.det.fits')

        f.writelines('exit\n')

    # log output
    log_file = f'{outpath}/{name}/{name}_ximg_det.log'

    # call xselect
    cmd = f'ximage < {outpath}/{name}/{name}_ximg_det.in > {log_file}'
    if not pp:
        os.system(cmd)
    else:
        pp.submit(call_cmd, cmd)

    return name

In [None]:
def pointInEllipse(xo, yo, xp, yp, d, D, angle):
    #tests if a point[xp,yp] is within
    #boundaries defined by the ellipse
    #of center[x,y], diameter d D, and tilted at angle
    
    ## This is for the detect_vtp function #
    
    cosa = cos(angle)
    sina = sin(angle)
    
    dd = d**2
    DD = D**2

    a = pow(cosa * (xp - xo) + sina * (yp - yo), 2)
    b = pow(sina * (xp - xo) - cosa * (yp - yo), 2)
    ellipse = (a / dd) + (b / DD)
    
    if ellipse <= 1:
        return True
    else:
        return False

In [None]:
def detect_vtpname(name, outpath):
    # set up all the non-file-specific parameters                              
    params = {}
    params['limit'] = 1E-6
    params['coarse'] = 5
    params['maxiter'] = 10
    params['clobber'] = 'yes'
    params['verbose'] = 1

    evts = f'{outpath}/{name}/{name}_events.fits'
    expmap = f'{outpath}/{name}/{name}_exp.fits'

    # check to make sure the files exist.
    if not os.path.isfile(evts) and not os.path.isfile(expmap):
        return 0
                                                                               
    # We are going to run vtpdetect twice... once with a low scale and once with a high(er) scale
    params['regfile'] = f'{outpath}/{name}/{name}_vtp_low.reg'
    params['log'] = f'{outpath}/{name}/{name}_vtp_low.log'
    params['scale'] = 1
    outfits = f'{outpath}/{name}/{name}_vtp_low.detect'
    with open(f'{outpath}/{name}/{name}_vtp_low.in', 'w') as f:
        # build the cmd
        cmd = f'vtpdetect {evts}[pi=50:600] {expmap} {outfits} '
        for param, value in list(params.items()):
            cmd += f'{param}={value} '
        f.writelines(f'{cmd}\n')

    os.system(cmd)

    # now for the higher level
    params['regfile'] = f'{outpath}/{name}/{name}_vtp_high.reg'
    params['log'] = f'{outpath}/{name}/{name}_vtp_high.log'
    params['scale'] = 1.8
    outfits = f'{outpath}/{name}/{name}_vtp_high.detect'
    with open(f'{outpath}/{name}/{name}_vtp_high.in', 'w') as f:
        # build the cmd
        cmd = f'vtpdetect {evts}[pi=50:600] {expmap} {outfits} '
        for param, value in list(params.items()):
            cmd += f'{param}={value} '
        f.writelines(f'{cmd}\n')

    os.system(cmd)

    # now for the higher level
    params['regfile'] = f'{outpath}/{name}/{name}_vtp_high.reg'
    params['log'] = f'{outpath}/{name}/{name}_vtp_high.log'
    params['scale'] = 1.8
    outfits = f'{outpath}/{name}/{name}_vtp_high.detect'
    with open(f'{outpath}/{name}/{name}_vtp_high.in', 'w') as f:
        # build the cmd
        cmd = f'vtpdetect {evts}[pi=50:600] {expmap} {outfits} '
        for param, value in list(params.items()):
            cmd += f'{param}={value} '
        f.writelines(f'{cmd}\n')

    os.system(cmd)

    ###
    # Now we are building the final catalog by comparing the low and high scale catalogs.
    ###
    try:
        low = Table.read(f'{outpath}/{name}/{name}_vtp_low.detect', hdu=1)
        high = Table.read(f'{outpath}/{name}/{name}_vtp_high.detect', hdu=1)
    except FileNotFoundError:
        return

    # create a new table to store our results -- just makes an empty copy of the original
    final = Table(dtype=low.dtype)
    final.add_column(Column(name='INDEX', dtype='>i4'), index=0)
    final.add_column(Column(name='HIGH', dtype='>i4'))
                                                                               
    # have to add columns to the original tables to make them match
    low.add_column(Column(data=np.zeros(len(low)), name='INDEX', dtype='>i4'), index=0)
    low.add_column(Column(data=np.zeros(len(low)), name='HIGH', dtype='>i4'))
    high.add_column(Column(data=np.zeros(len(high)), name='INDEX', dtype='>i4'), index=0)
    high.add_column(Column(data=np.zeros(len(high)), name='HIGH', dtype='>i4'))

    index = 1 # vtp is normally 1 indexed
    for x_l, y_l, rad_l, rot_l, comp_l in low[['X', 'Y', 'R', 'ROTANG', 'COMPONENT']]:

        added_high = False # keep track if we found a high source

        for x_h, y_h, comp_h in high[['X', 'Y', 'COMPONENT']]:
            if pointInEllipse(x_l, y_l, x_h, y_h, rad_l[0], rad_l[1], 360 - rot_l):
                final.add_row(high[comp_h - 1]) # add the row
                final['INDEX'][index - 1] = index # add the index to the row
                final['HIGH'][index - 1] = 1 # say that the source came from high catalog
                index += 1
                added_high = True

        if not added_high:
            final.add_row(low[comp_l - 1]) # add the row
            final['INDEX'][index - 1] = index # add the index to the row
            final['HIGH'][index - 1] = 0 # say that the source came from low catalog
            index += 1

    # remove any source where the center of the region lies inside another region        
            
            
            
    final.write(f'{outpath}/{name}/{name}_vtp.detect', format='fits', overwrite=True)

    # write out the regions
    with open(f'{outpath}/{name}/{name}_vtp.reg', 'w') as reg:
        reg.write("# Region file format: DS9 version 4.1\n")
        reg.write('global color=cyan dashlist=8 3 width=1 font="helvetica 10 normal roman" select=1 '
                  'highlite=1 dash=0 fixed=0 edit=1 move=1 delete=1 include=1 source=1\n')
        reg.write('fk5\n')
        for j, xc, yc, rc, rotc in final[['INDEX', 'RA', 'DEC', 'R', 'ROTANG']]:
            reg.write(f'ellipse({xc},{yc},{(rc[0] * 2.36):.3f}",{(rc[1] * 2.36):.3f}",{rotc:.3f}) ')
            reg.write(f'# text={{{j}}}\n')

    return final

In [None]:
def psf(name, outpath, pp=None):
    
    with open(f'{outpath}/{name}/{name}_ximg_psf.in', 'w') as f:
        # loop over the blocked images
        for blk in [4, 8]:
            # loop over the energies
            for eng in [200, 300, 400, 500, 600]:
                if not os.path.isfile(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det'):
                    continue

                # figure out the background level
                with open(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det', 'r') as f2:
                    for l in f2.readlines():
                        if 'Back' in l:
                            background = float(l.split(':')[-1])
                            break

                # read the image
                f.writelines(f'read {outpath}/{name}/{name}_img_50-{eng}_bl{blk}.fits\n')

                # now we need to read the individual detections
                detects = Table.read(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det.fits')

                # write psf commands for each detection
                for i, (x, y) in enumerate(detects[['X', 'Y']]):
                    f.writelines(f'psf/xpix={x}/ypix={y}/back={background}/radius=4.25/noplot/'
                                f'fileplot={{{outpath}/{name}/{name}_img_50-{eng}_bl{blk}_{i}.psf}}\n')

                    
        f.writelines('exit\n')

    # log output
    log_file = f'{outpath}/{name}/{name}_ximg_psf.log'
        
    # call xselect
    cmd = f'ximage < {outpath}/{name}/{name}_ximg_psf.in > {log_file}'
    
    os.system(cmd)
        
    return name

In [None]:
def centroid(name, outpath):
    ''' I dont think this code will actually work '''
    
    # loop over the blocked images
    for blk in [4, 8]:
        # loop over the energies
        for eng in [200, 300, 400, 500, 600]:
            if not os.path.isfile(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det'):
                continue

            # now we need to read the individual detections
            detects = Table.read(f'{outpath}/{name}/{name}_img_50-{eng}_bl{blk}.det.fits')

            # write psf commands for each detection
            for i, (ra, dec) in enumerate(detects[['RA', 'DEC']]):
                with open(f'{outpath}/{name}/{name}_cent_50-{eng}_bl{blk}_{i}.in', 'w') as f:
                    cmd = 'xrtcentroid '
                    cmd += f'infile={outpath}/{name}/{name}_img_50-{eng}_bl{blk}.fits '
                    cmd += f'outfile={name}_img_50-{eng}_bl{blk}_{i}.cent '
                    cmd += f'outdir={outpath}/{name} '
                    cmd += f'boxra={ra} '
                    cmd += f'boxdec={dec} '
                    cmd += 'calcpos=yes ' 
                    cmd += 'interactive=no '
                    cmd += 'boxradius=1 '
                    cmd += 'clobber=yes'

                    f.writelines(cmd)

                # log output
                log_file = f'{outpath}/{name}/{name}_cent_50-{eng}_bl{blk}_{i}.log'

                # call xselect
                cmd = f'{cmd} > {log_file}'

                os.system(cmd)

In [None]:
def centroid_vtp(name, outpath):

    if not os.path.isfile(f'{outpath}/{name}/{name}_img_50-600.fits'):
        return
    else:
        evts = f'{outpath}/{name}/{name}_img_50-600.fits'
        
    # now we need to read the individual detections
    if not os.path.isfile(f'{outpath}/{name}/{name}_vtp.detect'):
        return
    else:    
        detects = Table.read(f'{outpath}/{name}/{name}_vtp.detect')

    if not os.path.isdir(f'{outpath}/{name}/centroids'):
        os.makedirs(f'{outpath}/{name}/centroids')
    
    # xrtcentroid can't handle the '+' in the filename.. 
    # So we'll make a symlink
    relpath = os.path.relpath(evts, f'{outpath}/{name}/centroids/')
    try:
        os.symlink(relpath, f'{outpath}/{name}/centroids/xrtcentroid.fits')
    except FileExistsError:
        pass
    
    # write out the new regions
    with open(f'{outpath}/{name}/centroids/{name}_newcentroid.reg', 'w') as reg:
        reg.write("# Region file format: DS9 version 4.1\n")                   
        reg.write('global color=cyan dashlist=8 3 width=1 font="helvetica 10 normal roman" select=1 '
                  'highlite=1 dash=0 fixed=0 edit=1 move=1 delete=1 include=1 source=1\n')
        reg.write('fk5\n')
    
        # write psf commands for each detection
        for i, ra, dec, r, rotc in detects[['INDEX', 'RA', 'DEC', 'R', 'ROTANG']]:
            cmd = 'xrtcentroid '
            cmd += f'infile={outpath}/{name}/centroids/xrtcentroid.fits '
            cmd += f'outfile={name}_{i}.cent '
            cmd += f'outdir={outpath}/{name}/centroids '
            cmd += f'boxra={ra} '
            cmd += f'boxdec={dec} '
            cmd += 'calcpos=yes ' 
            cmd += 'interactive=no '
            cmd += f'boxradius={(max(r) * 2.36) / 60:0.3f} '
            cmd += 'clobber=yes'

            # log output
            log_file = f'{outpath}/{name}/centroids/{name}_{i + 1}.log'

            # call xselect
            cmd = f'{cmd} > {log_file}'

            os.system(cmd)

            with open(f'{outpath}/{name}/centroids/{name}_{i}.cent', 'r') as cent:
                cent.readline()
                cent.readline()
                ra = cent.readline().split('=')[-1].rstrip('\n')
                dec = cent.readline().split('=')[-1].rstrip('\n')

            reg.write(f'ellipse({ra},{dec},{(r[0] * 2.36):.3f}",{(r[1] * 2.36):.3f}",{rotc:.3f}) ')
            reg.write(f'# text={{{i}}}\n')

In [None]:
def get_radprof(name, outpath, overwrite=False):
            
    # handle specific cases that take a LOOOOONNNGGG time
    # these are the really big, nearby clusters
    if name in ['PSZ2_G062.94+43.69', 'PSZ2_G164.18-38.88', 
                'PSZ2_G164.18-38.88', 'PSZ2_G302.41+21.60',
                'PSZ2_G306.77+58.61', 'PSZ2_G061.75+88.11',
                'PSZ2_G274.73-32.20', 'PSZ2_G302.49+21.53']:
        return
        
    # Swift XRT pixel scale
    # 1 pixel = 6.548089E-04 * 3600 arcsec
    pixscale = 6.548089E-04 * 3600
    
    # model parameters
    params = {}
    params['n'] = 75
    params['r0'] = 0 
    params['dr'] = 4
    
    # check for files
    # events image
    if not os.path.isfile(f'{outpath}/{name}/{name}_img_50-200.fits'):
        return
    else:
        evnts = f'{outpath}/{name}/{name}_img_50-200.fits'
    # expo map
    if not os.path.isfile(f'{outpath}/{name}/{name}_exp.fits'):
        return
    else:
        expmap = f'{outpath}/{name}/{name}_exp.fits'   
    # detections
    if not os.path.isfile(f'{outpath}/{name}/{name}_vtp.detect'):
        return
    else:
        srcs = f'{outpath}/{name}/{name}_vtp.detect'
    
    # now we need to read the individual detections
    detects = Table.read(srcs, hdu=1)
    
    # we are going to compute profiles for the 10 biggest -- this is for speed. 
    detects.sort(['SRC_AREA'], reverse=True)
    
    # loop over the sources -- component is the source ID number
    for i, xc, yc in detects[['INDEX', 'X', 'Y']][:10]:
        # decide whether we need to redo the profile
        if overwrite:
            pass
        elif os.path.isfile(f'{outpath}/{name}/{name}_vtp_{i}.radprof'):
            continue
        else:
            pass

        try:
            data = Table.read(f'{outpath}/{name}/{name}_vtp_{i}.radprof', format='ascii', header_start=2)
            if len(data) >= 70:
                continue
            else:
                pass
        except FileNotFoundError:
            pass
        
        # output file
        with open(f'{outpath}/{name}/{name}_vtp_{i}.radprof', 'w') as radprof:
            radprof.write(f"# source number and position: {name}_{i} {xc:.3f} {yc:.3f}\n")
            radprof.write(f"# profile parameters: {params['n']} {params['r0']} {params['dr']}\n")
            radprof.write(f"# bin r1 r2 x y w sb sb_err\n")

            fd2, path2 = tempfile.mkstemp() # this is the temp file to store the awk output
            for irad in range(params['n']):
                r1 = params['r0'] + irad * params['dr']
                r2 = params['r0'] + (irad + 1) * params['dr']

                fd, path = tempfile.mkstemp()
                with os.fdopen(fd, 'w') as reg:
                    reg.write("# Region file format: CIAO version 1.0\n")
                    reg.write(f"+annulus({xc},{yc},{r1},{r2})\n")
                    for j, xc1, yc1, rc, rotc in detects[['INDEX', 'X', 'Y', 'R', 'ROTANG']]:
                        if not j == i:
                            reg.write(f"-ellipse({xc1},{yc1},{rc[0]},{rc[1]},{rotc})\n")
                          
                #  No. of counts from data image
                cmd = f"dmstat '{evnts}[(x,y)=region({path})]' centroid- | grep sum | awk '{{print $2}}' > {path2}"
                #print(cmd)
                os.system(cmd)
                with open(path2) as tmp:
                    x = tmp.readlines()[0][:-1]
                x = float(x)

                #  Mean exposure
                cmd = f"dmstat \"{expmap}[(x,y)=region({path})]\" centroid- | grep mean | awk '{{print $2}}' > {path2}"
                os.system(cmd)
                with open(path2) as tmp:
                    y = tmp.readlines()[0][:-1]
                y = float(y)

                #  No. of pixels
                cmd = f"dmstat \"{expmap}[(x,y)=region({path})]\" centroid- | grep good | awk '{{print $2}}' > {path2}"
                os.system(cmd)
                with open(path2) as tmp:
                    w = tmp.readlines()[0][:-1]
                w = float(w)

                os.remove(path)
            
                try:
                    sb = x / (y * w * pixscale**2 / 3600)  #  cts/s/arcmin^2
                    err_gehrels = sqrt(x + 0.75)
                    sbe = (1. + err_gehrels) / (y * w * pixscale**2 / 3600)  #  cts/s/arcmin^2
                    radprof.write(f"{irad:3d} {r1:7d} {r2:7d} {x:9.1f} {y:9.1f} {w:9.1f} {sb:12.4e} {sbe:12.4e}\n")
                except ZeroDivisionError:
                    print(f'{name}_{i} -- Zero Division in sb calculation! -- bin number {irad:3d}')
                    print(f'more info {w:.3f} {y:.3f}')

            os.remove(path2)

    return

In [None]:
# get file data
data = load_PSZcatalog()
data = data.sort_index(axis=1)

outpath = './data_full'

# arr = [{'name':n.replace(' ', '_'), 'outpath':outpath, 'overwrite':True} for n in data['NAME']]
arr = [{'name':n.replace(' ', '_'), 'outpath':outpath} for n in data['NAME']]

##########
### these are legacy and don't really need to be run ###
#parallel_process(arr, source_detect, use_kwargs=True)
#parallel_process(arr, psf, use_kwargs=True)
#parallel_process(arr, centroid, use_kwargs=True)
##########


# parallel_process(arr, detect_vtp, use_kwargs=True, n_jobs=1)
parallel_process(arr, centroid_vtp, use_kwargs=True, n_jobs=6)
#parallel_process(arr, get_radprof, use_kwargs=True, n_jobs=1)

In [None]:
outpath = './data_full'
name = 'PSZ2_G080.37+14.64'

In [None]:
centroid_vtp(name, outpath)

In [None]:
name = 'PSZ2_G306.77+58.61'
outpath = './data_full'
get_radprof(name, outpath, overwrite=True)