In [1]:
%matplotlib notebook
from astropy.table import Table, vstack, join
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import itertools
sys.path.append(f'{os.environ["HOME"]}/Projects/planckClusters/catalogs')
from load_catalogs import load_PSZcatalog
from astropy.io.fits import getheader
from utilities import redshifts_from_papers, get_immediate_subfiles

import warnings
from astropy.utils.exceptions import AstropyWarning
warnings.simplefilter('ignore', category=AstropyWarning)

  from tqdm.autonotebook import tqdm


In [2]:
def load_MCMCfits(outpath, fields):
    for field in fields:
        if os.path.exists(f'{outpath}/{field}/{field}_mcmcfits.txt'):
            try:
                fit_part = Table.read(f'{outpath}/{field}/{field}_mcmcfits.txt', format='ascii', header_start=0)
            except :
                print(field)
                raise (FileNotFoundError)
            src_part = Table.read(f'{outpath}/{field}/{field}_vtp.detect', format='fits')
            
            fit_part.rename_column('ID', 'INDEX')
            try:
                data_part = join(fit_part, src_part, keys='INDEX', join_type='inner')
            except ValueError:
                continue
        else:
            continue

        # handle the first pass when we only have one part
        try:
            data = vstack([data, data_part])
        except NameError:
            data = data_part
        
    return data

def load_regions(outpath, field, index=0):
    # get all the files
    files = get_immediate_subfiles(f'{outpath}/{field}/spec_files/')
    
    # only want region files -- specific index
    files = [f for f in files if 'bkg.reg' in f]
    
    if len(files) == 0:
        return -1.0
    
    # read the region file and return radius IN PIXELS
    with open(files[0], 'r') as f:
        lines = f.readlines()

    if 'ellipse' in lines[index + 1]:
        r1 = lines[index + 1].split(',')[-3]
        r2 = lines[index + 1].split(',')[-2]
        
        return (float(r1) + float(r2)) / 2
    
    # when making background regions we mask 1.5 * extraction radius for each source
    # here we will take that factor out, because we don't use it in the actual source 
    # extraction
    
    if index:
        return float(lines[index + 1].split(',')[-1][:-2]) / 1.5 

    return float(lines[index + 1].split(',')[-1][:-2])

In [3]:
datapath = '../data'
data = load_PSZcatalog()
data = data.sort_values('NAME')

fields = [n.replace(' ', '_') for n in data['NAME']]

# read in the MCMC fits
fits = load_MCMCfits(datapath, fields)
# read in the catalog
cat = Table.read('./catalogs/results_table_inspected_0.5_2.0.fits')
# only use non-duplicates
# cat = cat[~cat['DUPLICATE']]

fits.remove_column('R')
df_fits = fits.to_pandas()
df_fits.Field = df_fits.Field.str.replace('_', ' ')
df_cat = cat.to_pandas()
df_cat = df_cat.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)

df_merged = df_cat.merge(df_fits,                                                      
               how='outer',                                              
               left_on=['NAME', 'INDEX'],                                          
               right_on=['Field', 'INDEX'],                                           
               suffixes=('_cat', '_fit'))

In [4]:
# add a column for the extraction radius and bkg radius
df_merged['EXT_RAD'] = 0.0
df_merged['BKG_RAD'] = 0.0
pixscale = 2.36

for i, row in df_merged.iterrows():
    field = row.Field.replace(' ', '_')
    ext_rad = load_regions(datapath, field, row.INDEX) * pixscale / 60
    bkg_rad = load_regions(datapath, field, 0) * pixscale / 60
    df_merged.at[i, 'EXT_RAD'] = ext_rad
    df_merged.at[i, 'BKG_RAD'] = bkg_rad

df = df_merged[['Field', 'INDEX', 'bg_50', 'EXT_RAD', 'BKG_RAD']]

table = Table.from_pandas(df)

# table.filled(-1.0).write('./catalogs/bkgs_radii.fits', overwrite=True)

In [5]:
df.plot.scatter('bg_50', 'EXT_RAD')
plt.ylim(1,1000)
plt.loglog()

<IPython.core.display.Javascript object>

[]

In [6]:
df_merged['FLAGS'] = ''
df_merged.loc[df.EXT_RAD > df.BKG_RAD, 'FLAGS'] = 'A'

In [7]:
df_merged[df_merged.FLAGS != '']

Unnamed: 0,NAME,RA_detc,DEC_detc,REDSHIFT,INDEX,PSZ_dist_cat,EXPTIME_cat,SN,redCHI2,DOF,...,Y_ERR,RA,DEC,RA_ERR,DEC_ERR,PSZ_dist_fit,Extended,EXT_RAD,BKG_RAD,FLAGS
51,PSZ2 G011.36+49.42,230.465241,7.70556,0.0442,1,2.030819,8903.960938,112.9739,1.898,432.0,...,46.526254,230.465241,7.70556,0.037592,0.030501,2.030819,1,9.57056,9.5,A
110,PSZ2 G084.19-47.48,347.601878,7.579836,0.0424,1,4.152838,4303.338379,44.9956,1.695,179.0,...,14.503777,347.601878,7.579836,0.006798,0.009508,4.152838,1,45.397642,11.0,A
117,PSZ2 G085.82+35.44,258.842066,57.416603,0.0276,1,2.702335,3591.577881,-1.0,2.0379,193.0,...,22.960577,258.842066,57.416603,0.00964,0.015052,2.702335,1,27.727037,11.0,A
157,PSZ2 G114.83+57.25,201.458915,59.328087,0.169517,1,0.428689,4933.425781,35.4449,1.7805,120.0,...,16.63816,201.458915,59.328087,0.010994,0.010907,0.428689,1,12.174244,9.5,A
159,PSZ2 G115.25-72.07,10.462759,-9.306315,0.0555,1,2.466641,6059.879395,-1.0,1.5526,518.0,...,54.912269,10.462759,-9.306315,0.032901,0.035998,2.466641,1,123.047804,11.0,A
184,PSZ2 G136.64-25.03,28.198441,36.156433,0.0163,1,5.234638,8186.443359,-1.0,2.702,397.0,...,49.484784,28.198441,36.156433,0.02975,0.03244,5.234638,1,220.031453,11.0,A
196,PSZ2 G146.35-15.59,43.629608,41.582127,0.0172,1,2.115895,841.59198,-1.0,1.0103,197.0,...,16.78704,43.629608,41.582127,0.014761,0.011005,2.115895,1,23.365154,11.0,A
222,PSZ2 G173.76+22.92,109.371383,44.084699,-1.0,5,1.627197,7824.385742,-1.0,-1.0,-1.0,...,18.780161,109.371383,44.084699,0.006668,0.012311,1.627197,1,28.346196,9.5,A
223,PSZ2 G173.76+22.92,109.361427,44.138906,-1.0,6,4.882896,7304.895996,-1.0,-1.0,-1.0,...,8.007824,109.361427,44.138906,0.004353,0.00525,4.882896,1,21.694746,9.5,A
255,PSZ2 G210.22+15.56,115.669774,9.373928,0.062,1,2.367259,2832.924805,-1.0,1.9746,99.0,...,11.691221,115.669774,9.373928,0.006759,0.007664,2.367259,1,15.462536,11.0,A


The next part is a little harder. We need to know if there are any sources where the extraction radius of one overlaps with the with extraction radius of another. But we only need to do it in single fields. I think what we should do is load the detection catalogs for each field. Then we check.

In [8]:
current = ''
for i, row in df_merged.iterrows():
    df_tmp = df_merged.loc[df_merged.Field == row.Field]

    if len(df_tmp) >=2 and row.Field != current:
        current = row.Field
        for x, y in itertools.combinations(df_tmp.index, 2):
            x1 = df_merged.iloc[x].X
            y1 = df_merged.iloc[x].Y
            x2 = df_merged.iloc[y].X
            y2 = df_merged.iloc[y].Y
            r1 = abs(df_merged.iloc[x].EXT_RAD) * 60 / 2.36
            r2 = abs(df_merged.iloc[y].EXT_RAD) * 60 / 2.36
            
            # check to see if one extraction circle wholly inside another
            if (r1 - r2)**2 >= ( (x2-x1)**2 + (y2-y1)**2):
                print(f"{df_merged.iloc[x].Field} {df_merged.iloc[x].INDEX} and {df_merged.iloc[y].INDEX} overlap")
                if not 'B' in df_merged.iloc[x].FLAGS:
                    df_merged.at[x, 'FLAGS'] += 'B'
                if not 'B' in df_merged.iloc[y].FLAGS:
                    df_merged.at[y, 'FLAGS'] += 'B'
            # check if the two extraction circles intersect.
            elif (r1 - r2)**2 <= (x1 - x2)**2 + (y1 - y2)**2 <= (r1 + r2)**2:
                print(f"{df_merged.iloc[x].Field} {df_merged.iloc[x].INDEX} and {df_merged.iloc[y].INDEX} intersect")
                if not 'B' in df_merged.iloc[x].FLAGS:
                    df_merged.at[x, 'FLAGS'] += 'B'
                if not 'B' in df_merged.iloc[y].FLAGS:
                    df_merged.at[y, 'FLAGS'] += 'B'
            else:
                print(f"{df_merged.iloc[x].Field} {df_merged.iloc[x].INDEX} and {df_merged.iloc[y].INDEX} do not intersect")


PSZ1 G031.41+28.75 22 and 26 do not intersect
PSZ1 G031.41+28.75 22 and 33 do not intersect
PSZ1 G031.41+28.75 22 and 21 do not intersect
PSZ1 G031.41+28.75 22 and 30 do not intersect
PSZ1 G031.41+28.75 22 and 31 do not intersect
PSZ1 G031.41+28.75 22 and 34 do not intersect
PSZ1 G031.41+28.75 26 and 33 do not intersect
PSZ1 G031.41+28.75 26 and 21 do not intersect
PSZ1 G031.41+28.75 26 and 30 do not intersect
PSZ1 G031.41+28.75 26 and 31 do not intersect
PSZ1 G031.41+28.75 26 and 34 do not intersect
PSZ1 G031.41+28.75 33 and 21 do not intersect
PSZ1 G031.41+28.75 33 and 30 do not intersect
PSZ1 G031.41+28.75 33 and 31 do not intersect
PSZ1 G031.41+28.75 33 and 34 do not intersect
PSZ1 G031.41+28.75 21 and 30 do not intersect
PSZ1 G031.41+28.75 21 and 31 do not intersect
PSZ1 G031.41+28.75 21 and 34 do not intersect
PSZ1 G031.41+28.75 30 and 31 do not intersect
PSZ1 G031.41+28.75 30 and 34 do not intersect
PSZ1 G031.41+28.75 31 and 34 do not intersect
PSZ1 G083.35+76.41 1 and 2 interse

PSZ2 G234.52+82.85 1 and 2 do not intersect
PSZ2 G260.80+06.71 2 and 1 do not intersect
PSZ2 G268.34-43.64 11 and 16 do not intersect
PSZ2 G268.34-43.64 11 and 17 do not intersect
PSZ2 G268.34-43.64 11 and 21 do not intersect
PSZ2 G268.34-43.64 11 and 6 do not intersect
PSZ2 G268.34-43.64 11 and 8 do not intersect
PSZ2 G268.34-43.64 11 and 18 do not intersect
PSZ2 G268.34-43.64 11 and 19 do not intersect
PSZ2 G268.34-43.64 16 and 17 do not intersect
PSZ2 G268.34-43.64 16 and 21 do not intersect
PSZ2 G268.34-43.64 16 and 6 do not intersect
PSZ2 G268.34-43.64 16 and 8 do not intersect
PSZ2 G268.34-43.64 16 and 18 do not intersect
PSZ2 G268.34-43.64 16 and 19 do not intersect
PSZ2 G268.34-43.64 17 and 21 do not intersect
PSZ2 G268.34-43.64 17 and 6 do not intersect
PSZ2 G268.34-43.64 17 and 8 do not intersect
PSZ2 G268.34-43.64 17 and 18 do not intersect
PSZ2 G268.34-43.64 17 and 19 do not intersect
PSZ2 G268.34-43.64 21 and 6 do not intersect
PSZ2 G268.34-43.64 21 and 8 do not intersect


PSZ2 G202.61-26.26 10 and 11 do not intersect
PSZ2 G202.61-26.26 10 and 8 do not intersect
PSZ2 G202.61-26.26 10 and 12 do not intersect
PSZ2 G202.61-26.26 11 and 8 do not intersect
PSZ2 G202.61-26.26 11 and 12 intersect
PSZ2 G202.61-26.26 8 and 12 do not intersect
PSZ2 G225.56-27.27 3 and 2 intersect
PSZ2 G225.84+41.84 2 and 1 intersect
PSZ2 G226.12-19.51 2 and 1 do not intersect
PSZ2 G226.75+48.95 1 and 4 do not intersect
PSZ2 G226.75+48.95 1 and 5 do not intersect
PSZ2 G226.75+48.95 1 and 3 do not intersect
PSZ2 G226.75+48.95 1 and 6 do not intersect
PSZ2 G226.75+48.95 4 and 5 intersect
PSZ2 G226.75+48.95 4 and 3 intersect
PSZ2 G226.75+48.95 4 and 6 intersect
PSZ2 G226.75+48.95 5 and 3 intersect
PSZ2 G226.75+48.95 5 and 6 intersect
PSZ2 G226.75+48.95 3 and 6 do not intersect
PSZ2 G230.29-47.13 1 and 2 do not intersect
PSZ2 G234.52+82.85 1 and 2 do not intersect
PSZ2 G260.80+06.71 2 and 1 do not intersect
PSZ2 G268.34-43.64 11 and 16 do not intersect
PSZ2 G268.34-43.64 11 and 17 do n

In [9]:
df_cat = df_cat.merge(df_merged[['Field', 'INDEX', 'FLAGS']],                                                      
               how='left',                                              
               left_on=['NAME', 'INDEX'],                                          
               right_on=['Field', 'INDEX'],                                           
               suffixes=('_cat', '_fit'))

In [18]:
df_cat[(df_cat.FLAGS != '') & (~df_cat.DUPLICATE)][['Field', 'REDSHIFT', 'INDEX', 'FLAGS', 'DUPLICATE']]

Unnamed: 0,Field,REDSHIFT,INDEX,FLAGS,DUPLICATE
28,PSZ1 G292.00-43.64,-1.0,7,B,False
51,PSZ2 G011.36+49.42,0.0442,1,A,False
72,PSZ2 G045.20+15.63,-1.0,2,B,False
80,PSZ2 G052.35-31.98,0.264975,1,B,False
81,PSZ2 G052.35-31.98,0.264975,2,B,False
84,PSZ2 G056.79-11.60,0.122,1,B,False
103,PSZ2 G078.36-47.22,0.1696,1,B,False
104,PSZ2 G078.36-47.22,0.1696,2,B,False
110,PSZ2 G084.19-47.48,0.0424,1,A,False
114,PSZ2 G084.69+42.28,0.13,4,B,False


In [20]:
table = Table.from_pandas(df_cat)
table.filled(-1.0).write('./catalogs/results_table_inspected_flagged_0.5_2.0.fits', overwrite=True)