# Swift DeepSky over Stripe82 cleaning

We have two tables -- count rates and nufnu fluxes -- containing the combined results of the SDS pipeline over all Swift observations within the Stripe82 region.

Most probably these tables contain duplicated entries, which we want to remove.
The catalog we want is to be made by unique sources, usually called *primary* sources, that score better is a given property.

The criterion we will use to select filter the sources is their signal-to-noise ratio in the `full` band: $0.3-10 keV$; *i.e*, all detected events.
The object that scores better among its duplicates guarantees its place in the final catalog.

The cross-matching and filtering process is done by `xmatch`, we give it the coordinates and selection criterion columns and it outputs the primary catalog.

## Filter catalogs

Filenames:
* `table_countrates_detections_stripe82.csv`
* `table_flux_detections_stripe82.csv`

In [4]:
import warnings
warnings.simplefilter("ignore")

data_dir = 'swift_deepsky/'
def fname(name):
    from os import path
    return path.join(data_dir, name)

### Energy flux table

In [2]:
!head -n2 $data_dir/table_flux_detections_stripe82.csv

RA;DEC;NH;ENERGY_SLOPE;ENERGY_SLOPE_ERROR;EXPOSURE_TIME;nufnu_3keV(erg.s-1.cm-2);nufnu_error_3keV(erg.s-1.cm-2);nufnu_0.5keV(erg.s-1.cm-2);nufnu_error_0.5keV(erg.s-1.cm-2);upper_limit_0.5keV(erg.s-1.cm-2);nufnu_1.5keV(erg.s-1.cm-2);nufnu_error_1.5keV(erg.s-1.cm-2);upper_limit_1.5keV(erg.s-1.cm-2);nufnu_4.5keV(erg.s-1.cm-2);nufnu_error_4.5keV(erg.s-1.cm-2);upper_limit_4.5keV(erg.s-1.cm-2)
00:56:24.480;-01:16:38.317;3.54E+20;0.8;-999/-999;4572.8;7.96961e-14;2.1177e-14;2.24733e-14;1.55274e-14;-9.990E+02;1.08826e-13;4.02727e-14;-9.990E+02;9.37007e-14;4.60196e-14;-9.990E+02


In [3]:
def run_xmatch(df, flux_column, flux_error_column):

    from astropy.coordinates import SkyCoord
    from astropy import units
    coords = SkyCoord(df['RA'], df['DEC'], unit=(units.hourangle,units.degree))
    ra = coords.ra
    ra.wrap_angle = 180 * units.deg
    dec = coords.dec
    df['RA'] = ra.deg
    df['DEC'] = dec.deg

    df['snr'] = df[flux_column] / df[flux_error_column]
    
    from xmatch import xmatch
    cols = dict(ra='RA', dec='DEC', id='OBJID')

    from astropy.coordinates import Angle
    rad = Angle(5,'arcsec')

    xcat = xmatch(df, df, cols, cols, radius=rad, snr_column='snr')
    
    return xcat

In [6]:
detections_filename = fname('table_flux_detections_stripe82.csv')
flux_column = 'nufnu_3keV(erg.s-1.cm-2)'
flux_error_column = 'nufnu_error_3keV(erg.s-1.cm-2)'

import pandas
df = pandas.read_csv(detections_filename, sep=';')
df.index.name = 'OBJID'
df = df.reset_index()
df.head()

Unnamed: 0,OBJID,RA,DEC,NH,ENERGY_SLOPE,ENERGY_SLOPE_ERROR,EXPOSURE_TIME,nufnu_3keV(erg.s-1.cm-2),nufnu_error_3keV(erg.s-1.cm-2),nufnu_0.5keV(erg.s-1.cm-2),nufnu_error_0.5keV(erg.s-1.cm-2),upper_limit_0.5keV(erg.s-1.cm-2),nufnu_1.5keV(erg.s-1.cm-2),nufnu_error_1.5keV(erg.s-1.cm-2),upper_limit_1.5keV(erg.s-1.cm-2),nufnu_4.5keV(erg.s-1.cm-2),nufnu_error_4.5keV(erg.s-1.cm-2),upper_limit_4.5keV(erg.s-1.cm-2)
0,0,00:56:24.480,-01:16:38.317,3.54e+20,0.8,-999/-999,4572.8,7.96961e-14,2.1177e-14,2.24733e-14,1.55274e-14,-999.0,1.08826e-13,4.02727e-14,-999.0,9.37007e-14,4.60196e-14,-999.0
1,1,00:56:19.136,-01:14:58.198,3.52e+20,0.8,-999/-999,4684.2,6.27709e-14,1.97526e-14,1.27727e-14,1.25221e-14,-999.0,7.07549e-14,3.43038e-14,-999.0,1.06643e-13,5.17034e-14,-999.0
2,2,00:56:23.004,-01:13:39.516,3.51e+20,0.8,-999/-999,4649.6,5.50197e-14,1.97456e-14,2.23859e-14,1.75189e-14,-999.0,4.65563e-14,3.00594e-14,-999.0,9.35337e-14,5.22796e-14,-999.0
3,3,00:56:16.418,-01:14:05.418,3.51e+20,0.8,-999/-999,4642.0,8.17327e-14,2.25664e-14,1.15106e-14,1.18772e-14,-999.0,9.56891e-14,3.93635e-14,-999.0,1.44238e-13,5.93351e-14,-999.0
4,4,00:56:16.922,-01:13:16.401,3.5e+20,0.8,-999/-999,4587.3,7.36584e-14,2.115e-14,5.61688e-14,2.41884e-14,-999.0,4.673e-14,2.64994e-14,-999.0,9.39095e-14,4.60162e-14,-999.0


In [5]:
xcat = run_xmatch(df, flux_column, flux_error_column)
xcat.head()

Unnamed: 0_level_0,A,A,A,B,B,B,AB,AB,AB
Unnamed: 0_level_1,RA,DEC,OBJID,RA,DEC,OBJID,snr,duplicates,snrs
0,14.102,-1.27731,0,14.102,-1.27731,0.0,3.763333,3309,3.76333286112
1,14.079733,-1.249499,1,14.079733,-1.249499,1.0,3.177855,3310,3.17785506718
2,14.09585,-1.227643,2,14.09585,-1.227643,2.0,2.786428,3311,2.78642836885
3,14.068408,-1.234838,3,14.068408,-1.234838,3.0,3.621876,3312,3.62187588627
4,14.070508,-1.221223,4,14.070508,-1.221223,4.0,3.482667,3313,3.48266666667


In [6]:
df_xcat = df.set_index('OBJID').loc[xcat[('B','OBJID')].astype(int)]
df_xcat.describe()

Unnamed: 0,RA,DEC,NH,ENERGY_SLOPE,EXPOSURE_TIME,nufnu_3keV(erg.s-1.cm-2),nufnu_error_3keV(erg.s-1.cm-2),nufnu_0.5keV(erg.s-1.cm-2),nufnu_error_0.5keV(erg.s-1.cm-2),upper_limit_0.5keV(erg.s-1.cm-2),nufnu_1.5keV(erg.s-1.cm-2),nufnu_error_1.5keV(erg.s-1.cm-2),upper_limit_1.5keV(erg.s-1.cm-2),nufnu_4.5keV(erg.s-1.cm-2),nufnu_error_4.5keV(erg.s-1.cm-2),upper_limit_4.5keV(erg.s-1.cm-2),snr
count,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0
mean,15.019201,-0.106153,4.602366e+20,0.786529,35030.457308,1.325309e-13,1.134078e-14,1.228487e-13,-1.558351e-12,-932.8578,1.235086e-13,-4.240899e-12,-925.6292,1.478751e-13,-8.943898e-12,-897.076,6.109273
std,31.970745,0.78767,2.480186e+20,0.224127,49179.722289,1.249764e-12,1.84971e-14,1.124379e-12,8.395066e-12,248.4423,1.093083e-12,2.779925e-11,260.6507,1.397355e-12,4.650934e-11,302.4346,10.587381
min,-58.027692,-1.639452,1.81e+20,-1.723,387.5,2.04815e-15,3.48103e-16,0.0,-1.41221e-10,-999.0,0.0,-4.94421e-10,-999.0,0.0,-7.53389e-10,-999.0,2.040551
25%,-7.908507,-0.79166,2.82e+20,0.8,7126.325,1.269552e-14,2.900423e-15,6.133645e-15,2.402865e-15,-999.0,8.95041e-15,3.06276e-15,-999.0,1.2002e-14,3.845992e-15,-999.0,3.102268
50%,21.870021,-0.159245,3.6e+20,0.8,14459.75,2.496795e-14,6.13677e-15,1.504565e-14,5.617135e-15,-999.0,2.03593e-14,7.59764e-15,-999.0,2.71623e-14,9.770385e-15,-999.0,4.045499
75%,40.490854,0.516338,5.8425e+20,0.8,40978.45,5.331152e-14,1.200935e-14,3.787045e-14,1.259175e-14,-999.0,4.857688e-14,1.613043e-14,-999.0,5.982845e-14,2.05191e-14,-999.0,5.936806
max,60.102,1.606157,1.19e+21,3.32,295994.0,4.64653e-11,2.22404e-13,2.97963e-11,4.53661e-13,2.15313e-12,3.23009e-11,4.27803e-13,4.15981e-12,5.33482e-11,4.66212e-13,7.35169e-12,290.218897


In [7]:
df_xcat.head()

Unnamed: 0_level_0,RA,DEC,NH,ENERGY_SLOPE,ENERGY_SLOPE_ERROR,EXPOSURE_TIME,nufnu_3keV(erg.s-1.cm-2),nufnu_error_3keV(erg.s-1.cm-2),nufnu_0.5keV(erg.s-1.cm-2),nufnu_error_0.5keV(erg.s-1.cm-2),upper_limit_0.5keV(erg.s-1.cm-2),nufnu_1.5keV(erg.s-1.cm-2),nufnu_error_1.5keV(erg.s-1.cm-2),upper_limit_1.5keV(erg.s-1.cm-2),nufnu_4.5keV(erg.s-1.cm-2),nufnu_error_4.5keV(erg.s-1.cm-2),upper_limit_4.5keV(erg.s-1.cm-2),snr
OBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,14.102,-1.27731,3.54e+20,0.8,-999/-999,4572.8,7.96961e-14,2.1177e-14,2.24733e-14,1.55274e-14,-999.0,1.08826e-13,4.02727e-14,-999.0,9.37007e-14,4.60196e-14,-999.0,3.763333
1,14.079733,-1.249499,3.52e+20,0.8,-999/-999,4684.2,6.27709e-14,1.97526e-14,1.27727e-14,1.25221e-14,-999.0,7.07549e-14,3.43038e-14,-999.0,1.06643e-13,5.17034e-14,-999.0,3.177855
2,14.09585,-1.227643,3.51e+20,0.8,-999/-999,4649.6,5.50197e-14,1.97456e-14,2.23859e-14,1.75189e-14,-999.0,4.65563e-14,3.00594e-14,-999.0,9.35337e-14,5.22796e-14,-999.0,2.786428
3,14.068408,-1.234838,3.51e+20,0.8,-999/-999,4642.0,8.17327e-14,2.25664e-14,1.15106e-14,1.18772e-14,-999.0,9.56891e-14,3.93635e-14,-999.0,1.44238e-13,5.93351e-14,-999.0,3.621876
4,14.070508,-1.221223,3.5e+20,0.8,-999/-999,4587.3,7.36584e-14,2.115e-14,5.61688e-14,2.41884e-14,-999.0,4.673e-14,2.64994e-14,-999.0,9.39095e-14,4.60162e-14,-999.0,3.482667


In [8]:
del df_xcat['snr']

In [9]:
detections_filename = detections_filename[:-4] + '_unique.csv'
df_xcat.to_csv(detections_filename, sep=';')

In [10]:
!head -n3 $detections_filename

OBJID;RA;DEC;NH;ENERGY_SLOPE;ENERGY_SLOPE_ERROR;EXPOSURE_TIME;nufnu_3keV(erg.s-1.cm-2);nufnu_error_3keV(erg.s-1.cm-2);nufnu_0.5keV(erg.s-1.cm-2);nufnu_error_0.5keV(erg.s-1.cm-2);upper_limit_0.5keV(erg.s-1.cm-2);nufnu_1.5keV(erg.s-1.cm-2);nufnu_error_1.5keV(erg.s-1.cm-2);upper_limit_1.5keV(erg.s-1.cm-2);nufnu_4.5keV(erg.s-1.cm-2);nufnu_error_4.5keV(erg.s-1.cm-2);upper_limit_4.5keV(erg.s-1.cm-2)
0;14.102000000000004;-1.2773102777777776;3.54e+20;0.8;-999/-999;4572.8;7.96961e-14;2.1177e-14;2.24733e-14;1.55274e-14;-999.0;1.08826e-13;4.0272699999999995e-14;-999.0;9.37007e-14;4.60196e-14;-999.0
1;14.079733333333337;-1.2494994444444445;3.52e+20;0.8;-999/-999;4684.2;6.277090000000001e-14;1.9752599999999997e-14;1.27727e-14;1.25221e-14;-999.0;7.07549e-14;3.43038e-14;-999.0;1.06643e-13;5.1703399999999997e-14;-999.0


### Countrates table

In [11]:
!head -n2 table_countrates_detections_stripe82.csv

RA;DEC;countrates_0.3-10keV(ph.s-1);countrates_error_0.3-10keV(ph.s-1);exposure_time(s);countrates_0.3-1keV(ph.s-1);countrates_error_0.3-1keV(ph.s-1);upper_limit_0.3-1keV(ph.s-1);countrates_1-2keV(ph.s-1);countrates_error_1-2keV(ph.s-1);upper_limit_1-2keV(ph.s-1);countrates_2-10keV(ph.s-1);countrates_error_2-10keV(ph.s-1);upper_limit_2-10keV(ph.s-1)
00:56:24.480;-01:16:38.317;5.645E-03;1.500E-03;4572.8;8.684E-04;6.000E-04;-9.990E+02;3.040E-03;1.125E-03;-9.990E+02;1.737E-03;8.531E-04;-9.990E+02


In [12]:
detections_filename = fname('table_countrates_detections_stripe82.csv')
flux_column = 'countrates_0.3-10keV(ph.s-1)'
flux_error_column = 'countrates_error_0.3-10keV(ph.s-1)'

import pandas
df = pandas.read_csv(detections_filename, sep=';')
df.index.name = 'OBJID'
df = df.reset_index()

xcat = run_xmatch(df, flux_column, flux_error_column)

df_xcat = df.set_index('OBJID').loc[xcat[('B','OBJID')].astype(int)]

del df_xcat['snr']

df_xcat.describe()

Unnamed: 0,RA,DEC,countrates_0.3-10keV(ph.s-1),countrates_error_0.3-10keV(ph.s-1),exposure_time(s),countrates_0.3-1keV(ph.s-1),countrates_error_0.3-1keV(ph.s-1),upper_limit_0.3-1keV(ph.s-1),countrates_1-2keV(ph.s-1),countrates_error_1-2keV(ph.s-1),upper_limit_1-2keV(ph.s-1),countrates_2-10keV(ph.s-1),countrates_error_2-10keV(ph.s-1),upper_limit_2-10keV(ph.s-1)
count,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0,2764.0
mean,15.019201,-0.106153,0.01013,0.000803,35032.097323,0.004325,-0.057754,-933.772828,0.003467,-0.11809,-926.543975,0.00274,-0.165761,-897.990529
std,31.970745,0.78767,0.090519,0.001358,49183.157865,0.037844,0.311044,249.663021,0.0306,0.780844,261.840428,0.025492,0.862769,303.547454
min,-58.027692,-1.639452,0.000119,3.6e-05,387.5,0.0,-5.195,-2530.0,0.0,-13.99,-2530.0,0.0,-13.99,-2530.0
25%,-7.908507,-0.79166,0.000854,0.00019,7126.325,0.000219,8.2e-05,-999.0,0.000246,8.2e-05,-999.0,0.000223,7.1e-05,-999.0
50%,21.870021,-0.159245,0.001705,0.00043,14459.75,0.00055,0.00021,-999.0,0.000561,0.00021,-999.0,0.000499,0.000181,-999.0
75%,40.490854,0.516338,0.003709,0.00083,40978.45,0.001406,0.000469,-999.0,0.001337,0.000444,-999.0,0.00111,0.000385,-999.0
max,60.102,1.606157,2.716,0.019,295994.0,1.091,0.014,0.08248,0.9114,0.01229,0.1173,0.9446,0.0076,0.1364


In [13]:
# df.head()

In [14]:
# xcat.head()

In [15]:
detections_filename = detections_filename[:-4] + '_unique.csv'
df_xcat.to_csv(detections_filename, sep=';')

!head -n3 $detections_filename

OBJID;RA;DEC;countrates_0.3-10keV(ph.s-1);countrates_error_0.3-10keV(ph.s-1);exposure_time(s);countrates_0.3-1keV(ph.s-1);countrates_error_0.3-1keV(ph.s-1);upper_limit_0.3-1keV(ph.s-1);countrates_1-2keV(ph.s-1);countrates_error_1-2keV(ph.s-1);upper_limit_1-2keV(ph.s-1);countrates_2-10keV(ph.s-1);countrates_error_2-10keV(ph.s-1);upper_limit_2-10keV(ph.s-1)
0;14.102000000000004;-1.2773102777777776;0.005645000000000001;0.0015;4572.8;0.0008684;0.0006;-999.0;0.0030399999999999997;0.001125;-999.0;0.001737;0.0008531000000000001;-999.0
1;14.079733333333337;-1.2494994444444445;0.004449;0.0014;4684.2;0.0004943;0.0004846;-999.0;0.001977;0.0009585;-999.0;0.001977;0.0009585;-999.0


## Checking the results

In [16]:
from bokeh.plotting import figure
from bokeh.io import output_notebook, show

output_notebook()

fig = figure()

from astropy.coordinates import Angle
r = Angle(5,'arcsec').degree

x = df_xcat['RA']
y = df_xcat['DEC']

fig.circle(x, y, fill_alpha=0.3, fill_color='blue')

show(fig)