# Comparison of MLE and GC results from SDS x CS82

After matching the SDStripe82 catalog with CS82 using both xmatching algorithms: `mle` and `gc`, we will now check how their results change -- if at all.

The catalogs are stored in the following files:
* gc: `swift_deepsky/xcat_sds_cs82_gc_7arcsec.csv`
* mle: `swift_deepsky/xcat_sds_cs82_mle_MAG_AUTO.csv`

In [1]:
gc_catalog = 'swift_deepsky/xcat_sds_cs82_gc_7arcsec.csv'
mle_catalog = 'swift_deepsky/xcat_sds_cs82_mle_MAG_AUTO.csv'

In [2]:
import pandas
pandas.set_option('display.max_rows',10)

df_gc = pandas.read_csv(gc_catalog, index_col=0, header=[0,1])
df_mle = pandas.read_csv(mle_catalog, index_col=0, header=[0,1])

In [3]:
df_gc

Unnamed: 0_level_0,A,A,A,B,B,B,AB,AB,AB
Unnamed: 0_level_1,RA,DEC,OBJID,ALPHA_J2000,DELTA_J2000,OBJID,separation,duplicates,distances
0,14.102000,-1.277310,0,,,,,,
1,14.079733,-1.249499,1,,,,,,
2,14.095850,-1.227643,2,,,,,,
3,14.068408,-1.234838,3,,,,,,
4,14.070508,-1.221223,4,,,,,,
...,...,...,...,...,...,...,...,...,...
2759,17.460071,0.393572,6634,17.460878,0.392982,9439689.0,3.598603,9439942;9439963;9440078,5.14770756999403;5.3795387873798;6.57092331381...
2760,39.237083,0.726716,6732,39.236618,0.726102,14787476.0,2.772516,14787240;14787374,4.210032238557784;4.4539534358460795
2761,39.230850,0.613294,6733,39.232101,0.613970,14775413.0,5.117992,14775246,6.5627534729293835
2762,39.304346,0.671991,6734,39.305242,0.672597,14781889.0,3.893721,14781782;14781801,3.960413624509272;4.705623829117702


In [4]:
df_mle

Unnamed: 0_level_0,A,A,A,A,B,B,B,B,AB_MAG_AUTO,AB_MAG_AUTO,AB_MAG_AUTO,AB_MAG_AUTO,AB_MAG_AUTO
Unnamed: 0_level_1,RA,DEC,OBJID_A,pos_err,OBJID_B,ALPHA_J2000,DELTA_J2000,MAG_AUTO,Reliability,LR,duplicates,duplicates_LR,duplicates_R
OBJID_A,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
0,14.102000,-1.277310,0.0,5.0,,,,,,,,,
1,14.079733,-1.249499,1.0,5.0,,,,,,,,,
2,14.095850,-1.227643,2.0,5.0,,,,,,,,,
3,14.068408,-1.234838,3.0,5.0,,,,,,,,,
4,14.070508,-1.221223,4.0,5.0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6869,57.205183,-1.076907,6869.0,5.0,,,,,,,,,
6870,57.102754,-0.952921,6870.0,5.0,,,,,,,,,
6875,57.145083,-1.066406,6875.0,5.0,,,,,,,,,
6876,57.190829,-1.100567,6876.0,5.0,,,,,,,,,


## Let's check how many matching-pairs match between the catalogs

* This verification should gives us a measure on how much those two methods actually differ, and as such, an estimate of the xmatching error. 
* And it should point us to interesting sources or regions, where the nearest neighbor is *not* the "right" match.

In [5]:
objids_gc = df_gc[[('A','OBJID'),('B','OBJID')]]
objids_gc = objids_gc.dropna()
objids_gc = objids_gc.astype(int)
objids_gc.columns = ['OBJID_A','OBJID_B']
objids_gc.set_index('OBJID_A', inplace=True)

objids_gc.head()

Unnamed: 0_level_0,OBJID_B
OBJID_A,Unnamed: 1_level_1
14,6203717
45,10331489
46,10331779
47,10336933
48,10349649


In [6]:
objids_mle = df_mle[[('A','OBJID_A'),('B','OBJID_B')]]
objids_mle = objids_mle.dropna()
objids_mle = objids_mle.astype(int)
objids_mle.columns = ['OBJID_A','OBJID_B']
objids_mle.set_index('OBJID_A', inplace=True)

objids_mle.head()

Unnamed: 0_level_0,OBJID_B
OBJID_A,Unnamed: 1_level_1
14,6203632
45,10331489
46,10331970
47,10336874
48,10349673


In [7]:
all(objids_mle.index.sort_values() == objids_gc.index.sort_values())

True

In [8]:
dfx = objids_gc.join(objids_mle, how='outer', lsuffix='_gc', rsuffix='_mle')
dfx.head()

Unnamed: 0_level_0,OBJID_B_gc,OBJID_B_mle
OBJID_A,Unnamed: 1_level_1,Unnamed: 2_level_1
14,6203717,6203632
45,10331489,10331489
46,10331779,10331970
47,10336933,10336874
48,10349649,10349673


In [9]:
df_diff = dfx['OBJID_B_gc'] == dfx['OBJID_B_mle']
df_diff.head()

OBJID_A
14    False
45     True
46    False
47    False
48    False
dtype: bool

In [10]:
equal_matches = df_diff.sum() / len(df_diff)

print('{:.1f}% of matches coincide between "gc" and "mle"'.format(equal_matches * 100))

70.4% of matches coincide between "gc" and "mle"


**70.4% of matches coincide between "gc" and "mle"**

## Who are the non-matching objects from 'mle'?

Let's check some properties of those ~30% objects that matched using the likelihood estimator.

In [11]:
_idx = df_diff.loc[~df_diff].index
df_nonmatching = df_mle.set_index(('A','OBJID_A')).loc[_idx]
df_nonmatching.head()

Unnamed: 0_level_0,A,A,A,B,B,B,B,AB_MAG_AUTO,AB_MAG_AUTO,AB_MAG_AUTO,AB_MAG_AUTO,AB_MAG_AUTO
Unnamed: 0_level_1,RA,DEC,pos_err,OBJID_B,ALPHA_J2000,DELTA_J2000,MAG_AUTO,Reliability,LR,duplicates,duplicates_LR,duplicates_R
OBJID_A,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
14.0,-3.756921,-0.026718,5.0,6203632.0,-3.755398,-0.027458,19.368633,0.764507,0.019682,6203717;6203774,0.0024608894214439452;0.003601914698632182,0.09558660098397;0.1399066451650775
46.0,24.657625,-0.668684,5.0,10331970.0,24.65674,-0.668031,21.149553,0.446835,0.012284,10331779;10331716,0.00490188752174791;0.010305705467634313,0.17830270990683272;0.3748627858612379
47.0,24.819667,-0.621067,5.0,10336874.0,24.821171,-0.621196,21.617054,0.399341,0.007689,10336933;10336804;10336854,0.0025365640202913682;0.0037324721520687115;0....,0.131745698030017;0.19385954587316417;0.275053...
48.0,24.696104,-0.495326,5.0,10349673.0,24.695655,-0.495929,20.810715,0.598017,0.016153,10349649;10349580;10349671,0.00479203354230448;0.0028684722636638463;0.00...,0.177410883069921;0.10619671019945533;0.118375...
52.0,24.863604,-0.518196,5.0,10347523.0,24.863112,-0.518832,22.349245,0.565719,0.007371,10347561,0.005658521449655007,0.4342814700655355


In [12]:
29.59 < 100*len(df_nonmatching)/len(df_diff) < 29.60

True

In [13]:
from bokeh.io import output_notebook

output_notebook()

In [19]:
def plot_distros(data1, data2, legend1, legend2):
    from bokeh.plotting import figure

    p = figure(width=600, height=400)

    bins = (data1.min(), data1.max(), 50)

    p = histogram(p, data1.dropna(), color='yellow', bins=bins, legend=legend1)
    p = histogram(p, data2.dropna(), color='blue', bins=bins, legend=legend2)
    p.legend.location = "top_left"
    
    return p

In [32]:
# HISTOGRAM

def histogram(fig, data, color, bins, legend):
    import numpy as np
    
    bins = np.linspace(*bins)
    
    hist, edges = np.histogram(data, density=True, bins=bins)
    
    fig.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], 
             fill_color=color, line_color="#033649", fill_alpha=0.5,
             legend_label=legend if legend else '')
    
    return fig

## .. a bigger sample from CS82

In [17]:
df_cs82_reduced = pandas.read_csv('cs82_photoauto_reduced_for_sds.csv', index_col=0)
df_cs82_reduced.head()

Unnamed: 0,OBJID_B,ALPHA_J2000,DELTA_J2000,MAG_AUTO
0,6203717,-3.756927,-0.025511,25.151508
1,10331489,24.693307,-0.672908,21.615351
2,10331779,24.656872,-0.669435,23.783859
3,10336933,24.819401,-0.620192,25.077038
4,10349649,24.696638,-0.495811,24.011


#### Plot: Background sample: 'MAG_AUTO' distribution of background sources (yellow) and counterparts (blue)

In [22]:
from bokeh.io import show

_col = ('B','MAG_AUTO')

data1 = df_cs82_reduced[_col[1]].dropna()
data2 = df_mle[_col].dropna()
legend1 = 'background ({:d})'.format(len(data1))
legend2 = 'mle ({:d})'.format(len(data2))

p = plot_distros(data1, data2, legend1, legend2)
p.title.text = 'CS82 magnitude distribution of matched-vs-all sources'
p.title.align = 'center'
p.xaxis.axis_label = 'Magnitude (MAG_AUTO)'
p.yaxis.axis_label = '#'

show(p)

#### Plot: Background sample: 'MAG_AUTO' distribution of background sources (yellow) and the gc-gc (blue) sub-sample

In [23]:
_idx = df_diff.loc[df_diff].index
df_matching = df_mle.set_index(('A','OBJID_A')).loc[_idx]
df_matching.head()

from bokeh.io import show

_col = ('B','MAG_AUTO')

data1 = df_cs82_reduced[_col[1]].dropna()
data2 = df_matching[_col].dropna()
legend1 = 'background ({:d})'.format(len(data1))
legend2 = 'gc & gc ({:d})'.format(len(data2))

p = plot_distros(data1, data2, legend1, legend2)

p.title.text = 'CS82 magnitude distribution of matched-vs-all sources'
p.title.align = 'center'
p.xaxis.axis_label = 'Magnitude (MAG_AUTO)'
p.yaxis.axis_label = '#'

show(p)

#### Plot: Background sample: 'MAG_AUTO' distribution of background sources (yellow) and the mle-gc (blue) sub-sample

In [25]:
from bokeh.io import show

_col = ('B','MAG_AUTO')

data1 = df_cs82_reduced[_col[1]].dropna()
data2 = df_nonmatching[_col].dropna()
legend1 = 'background ({:d})'.format(len(data1))
legend2 = 'mle - gc ({:d})'.format(len(data2))

p = plot_distros(data1, data2, legend1, legend2)

p.title.text = 'CS82 magnitude distribution of matched-vs-all sources'
p.title.align = 'center'
p.xaxis.axis_label = 'Magnitude (MAG_AUTO)'
p.yaxis.axis_label = '#'

show(p)

#### Plot: Cross-matched sample: 'MAG_AUTO' distribution of all sources (yellow) and the mle-only (blue) sub-sample

In [26]:
from bokeh.io import show

_col = ('B','MAG_AUTO')

data1 = df_mle[_col]
data2 = df_nonmatching[_col]
legend1 = 'all matched'
legend2 = 'mle only (mle-gc)'
p = plot_distros(data1, data2, legend1, legend2)

show(p)

## The GC catalog

In [27]:
df_cs82_gc = df_cs82_reduced.set_index('OBJID_B').loc[objids_gc['OBJID_B']]
df_cs82_gc.head()

Unnamed: 0_level_0,ALPHA_J2000,DELTA_J2000,MAG_AUTO
OBJID_B,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
6203717,-3.756927,-0.025511,25.151508
10331489,24.693307,-0.672908,21.615351
10331779,24.656872,-0.669435,23.783859
10336933,24.819401,-0.620192,25.077038
10349649,24.696638,-0.495811,24.011


## Separation (GC) between pairs

In [30]:
def plot_distro(data):
    from bokeh.plotting import figure

    p = figure(width=600, height=400)

    bins = (data.min(), data.max(), 50)

    p = histogram(p, data.dropna(), color='blue', bins=bins, legend=None)
    
    return p

In [34]:
p = plot_distro(df_gc[('AB','separation')])
p.title.text = 'Distance separation between matching sources in SDS-x-CS82'
p.title.align = 'center'
p.xaxis.axis_label = 'Separation (arcsec)'
p.yaxis.axis_label = '#'

show(p)

In [35]:
df_gc[('AB','separation')].describe()

count    1105.000000
mean        2.901299
std         1.569761
min         0.052965
25%         1.700351
50%         2.643995
75%         3.864710
max         6.971090
Name: (AB, separation), dtype: float64

In [36]:
from bokeh.io import show

_col = ('B','MAG_AUTO')

data1 = df_cs82_reduced[_col[1]].dropna()
data2 = df_cs82_gc[_col[1]].dropna()
legend1 = 'background ({:d})'.format(len(data1))
legend2 = 'gc ({:d})'.format(len(data2))

p = plot_distros(data1, data2, legend1, legend2)

p.title.text = 'CS82 magnitude distribution of matched-vs-all sources'
p.title.align = 'center'
p.xaxis.axis_label = 'Magnitude (MAG_AUTO)'
p.yaxis.axis_label = '#'

show(p)

# Let's check the x-ray flux behaviour

In [39]:
sds_file = '../swift_deepsky/table_flux_detections_stripe82_unique.csv'

In [40]:
import pandas

df_sds = pandas.read_csv(sds_file, sep=';')
df_sds.head()

Unnamed: 0,OBJID,RA,DEC,NH,ENERGY_SLOPE,ENERGY_SLOPE_ERROR,EXPOSURE_TIME,nufnu_3keV(erg.s-1.cm-2),nufnu_error_3keV(erg.s-1.cm-2),nufnu_0.5keV(erg.s-1.cm-2),nufnu_error_0.5keV(erg.s-1.cm-2),upper_limit_0.5keV(erg.s-1.cm-2),nufnu_1.5keV(erg.s-1.cm-2),nufnu_error_1.5keV(erg.s-1.cm-2),upper_limit_1.5keV(erg.s-1.cm-2),nufnu_4.5keV(erg.s-1.cm-2),nufnu_error_4.5keV(erg.s-1.cm-2),upper_limit_4.5keV(erg.s-1.cm-2)
0,0,14.102,-1.27731,3.54e+20,0.8,-999/-999,4572.8,7.96961e-14,2.1177e-14,2.24733e-14,1.55274e-14,-999.0,1.08826e-13,4.02727e-14,-999.0,9.37007e-14,4.60196e-14,-999.0
1,1,14.079733,-1.249499,3.52e+20,0.8,-999/-999,4684.2,6.27709e-14,1.97526e-14,1.27727e-14,1.25221e-14,-999.0,7.07549e-14,3.43038e-14,-999.0,1.06643e-13,5.17034e-14,-999.0
2,2,14.09585,-1.227643,3.51e+20,0.8,-999/-999,4649.6,5.50197e-14,1.97456e-14,2.23859e-14,1.75189e-14,-999.0,4.65563e-14,3.00594e-14,-999.0,9.35337e-14,5.22796e-14,-999.0
3,3,14.068408,-1.234838,3.51e+20,0.8,-999/-999,4642.0,8.17327e-14,2.25664e-14,1.15106e-14,1.18772e-14,-999.0,9.56891e-14,3.93635e-14,-999.0,1.44238e-13,5.93351e-14,-999.0
4,4,14.070508,-1.221223,3.5e+20,0.8,-999/-999,4587.3,7.36584e-14,2.115e-14,5.61688e-14,2.41884e-14,-999.0,4.673e-14,2.64994e-14,-999.0,9.39095e-14,4.60162e-14,-999.0


In [41]:
df_sds.set_index('OBJID', inplace=True)
df_sds.head()

Unnamed: 0_level_0,RA,DEC,NH,ENERGY_SLOPE,ENERGY_SLOPE_ERROR,EXPOSURE_TIME,nufnu_3keV(erg.s-1.cm-2),nufnu_error_3keV(erg.s-1.cm-2),nufnu_0.5keV(erg.s-1.cm-2),nufnu_error_0.5keV(erg.s-1.cm-2),upper_limit_0.5keV(erg.s-1.cm-2),nufnu_1.5keV(erg.s-1.cm-2),nufnu_error_1.5keV(erg.s-1.cm-2),upper_limit_1.5keV(erg.s-1.cm-2),nufnu_4.5keV(erg.s-1.cm-2),nufnu_error_4.5keV(erg.s-1.cm-2),upper_limit_4.5keV(erg.s-1.cm-2)
OBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,14.102,-1.27731,3.54e+20,0.8,-999/-999,4572.8,7.96961e-14,2.1177e-14,2.24733e-14,1.55274e-14,-999.0,1.08826e-13,4.02727e-14,-999.0,9.37007e-14,4.60196e-14,-999.0
1,14.079733,-1.249499,3.52e+20,0.8,-999/-999,4684.2,6.27709e-14,1.97526e-14,1.27727e-14,1.25221e-14,-999.0,7.07549e-14,3.43038e-14,-999.0,1.06643e-13,5.17034e-14,-999.0
2,14.09585,-1.227643,3.51e+20,0.8,-999/-999,4649.6,5.50197e-14,1.97456e-14,2.23859e-14,1.75189e-14,-999.0,4.65563e-14,3.00594e-14,-999.0,9.35337e-14,5.22796e-14,-999.0
3,14.068408,-1.234838,3.51e+20,0.8,-999/-999,4642.0,8.17327e-14,2.25664e-14,1.15106e-14,1.18772e-14,-999.0,9.56891e-14,3.93635e-14,-999.0,1.44238e-13,5.93351e-14,-999.0
4,14.070508,-1.221223,3.5e+20,0.8,-999/-999,4587.3,7.36584e-14,2.115e-14,5.61688e-14,2.41884e-14,-999.0,4.673e-14,2.64994e-14,-999.0,9.39095e-14,4.60162e-14,-999.0


In [42]:
df_sds_matched = df_sds.loc[objids_gc.index]
df_sds_matched.head()

Unnamed: 0_level_0,RA,DEC,NH,ENERGY_SLOPE,ENERGY_SLOPE_ERROR,EXPOSURE_TIME,nufnu_3keV(erg.s-1.cm-2),nufnu_error_3keV(erg.s-1.cm-2),nufnu_0.5keV(erg.s-1.cm-2),nufnu_error_0.5keV(erg.s-1.cm-2),upper_limit_0.5keV(erg.s-1.cm-2),nufnu_1.5keV(erg.s-1.cm-2),nufnu_error_1.5keV(erg.s-1.cm-2),upper_limit_1.5keV(erg.s-1.cm-2),nufnu_4.5keV(erg.s-1.cm-2),nufnu_error_4.5keV(erg.s-1.cm-2),upper_limit_4.5keV(erg.s-1.cm-2)
OBJID_A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
14,-3.756921,-0.026718,3.32e+20,0.8,-999/-999,4826.6,5.2588e-14,1.4016e-14,7.64039e-14,2.36338e-14,-999.0,2.67966e-14,1.65778e-14,-999.0,8.09414e-14,-5.38711e-11,2.67738e-13
45,24.693637,-0.672514,2.65e+20,0.8,-999/-999,17176.0,9.71122e-15,3.69873e-15,4.28292e-15,3.26295e-15,-999.0,9.41844e-15,5.90822e-15,-999.0,1.43235e-14,8.98518e-15,-999.0
46,24.657625,-0.668684,2.65e+20,0.8,-999/-999,17138.0,1.43976e-14,4.38368e-15,4.61647e-15,3.28712e-15,-999.0,1.69171e-14,7.65094e-15,-999.0,2.0583e-14,1.03427e-14,-999.0
47,24.819667,-0.621067,2.67e+20,0.8,-999/-999,15367.0,1.32104e-14,4.24948e-15,9.99991e-15,5.00359e-15,-999.0,1.95108e-14,8.46753e-15,-999.0,1.88707e-14,-1.66835e-11,4.91133e-14
48,24.696104,-0.495326,2.62e+20,0.8,-999/-999,15398.6,2.04713e-14,5.33676e-15,1.20276e-14,5.39408e-15,-999.0,1.76615e-14,7.92077e-15,-999.0,2.68679e-14,1.20496e-14,-999.0


In [43]:
df_sds_nonmatched = df_sds.loc[~df_sds.index.isin(objids_gc.index)]
df_sds_nonmatched.head()

Unnamed: 0_level_0,RA,DEC,NH,ENERGY_SLOPE,ENERGY_SLOPE_ERROR,EXPOSURE_TIME,nufnu_3keV(erg.s-1.cm-2),nufnu_error_3keV(erg.s-1.cm-2),nufnu_0.5keV(erg.s-1.cm-2),nufnu_error_0.5keV(erg.s-1.cm-2),upper_limit_0.5keV(erg.s-1.cm-2),nufnu_1.5keV(erg.s-1.cm-2),nufnu_error_1.5keV(erg.s-1.cm-2),upper_limit_1.5keV(erg.s-1.cm-2),nufnu_4.5keV(erg.s-1.cm-2),nufnu_error_4.5keV(erg.s-1.cm-2),upper_limit_4.5keV(erg.s-1.cm-2)
OBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,14.102,-1.27731,3.54e+20,0.8,-999/-999,4572.8,7.96961e-14,2.1177e-14,2.24733e-14,1.55274e-14,-999.0,1.08826e-13,4.02727e-14,-999.0,9.37007e-14,4.60196e-14,-999.0
1,14.079733,-1.249499,3.52e+20,0.8,-999/-999,4684.2,6.27709e-14,1.97526e-14,1.27727e-14,1.25221e-14,-999.0,7.07549e-14,3.43038e-14,-999.0,1.06643e-13,5.17034e-14,-999.0
2,14.09585,-1.227643,3.51e+20,0.8,-999/-999,4649.6,5.50197e-14,1.97456e-14,2.23859e-14,1.75189e-14,-999.0,4.65563e-14,3.00594e-14,-999.0,9.35337e-14,5.22796e-14,-999.0
3,14.068408,-1.234838,3.51e+20,0.8,-999/-999,4642.0,8.17327e-14,2.25664e-14,1.15106e-14,1.18772e-14,-999.0,9.56891e-14,3.93635e-14,-999.0,1.44238e-13,5.93351e-14,-999.0
4,14.070508,-1.221223,3.5e+20,0.8,-999/-999,4587.3,7.36584e-14,2.115e-14,5.61688e-14,2.41884e-14,-999.0,4.673e-14,2.64994e-14,-999.0,9.39095e-14,4.60162e-14,-999.0


In [44]:
print(len(df_sds_matched))
print(len(df_sds_nonmatched))

1105
1659


In [45]:
from bokeh.io import show
from numpy import log10 as log

_col = 'nufnu_3keV(erg.s-1.cm-2)'

data1 = df_sds_nonmatched[_col].dropna()
data2 = df_sds_matched[_col].dropna()
                          
legend1 = 'non-matched ({:d})'.format(len(data1))
legend2 = 'matched ({:d})'.format(len(data2))

p = plot_distros(log(data1), log(data2), legend1, legend2)

_cnm = _col.split('(')[0]
p.title.text = 'SDS x-ray ({}) flux distribution'.format(_cnm)
p.title.align = 'center'
p.xaxis.axis_label = 'log( nuFnu_3keV [erg.s-1.cm-2] )'
p.yaxis.axis_label = '#'

p.legend.location = 'top_right'

show(p)

In [46]:
from bokeh.io import show
from numpy import log10 as log

_col = 'NH'

data1 = df_sds_nonmatched[_col].dropna()
data2 = df_sds_matched[_col].dropna()
                          
legend1 = 'non-matched ({:d})'.format(len(data1))
legend2 = 'matched ({:d})'.format(len(data2))

p = plot_distros(data1, data2, legend1, legend2)

p.title.text = 'SDS NH distribution'
p.title.align = 'center'
p.xaxis.axis_label = '{}'.format(_col)
p.yaxis.axis_label = '#'

p.legend.location = 'top_right'

show(p)

In [47]:
from bokeh.io import show
from numpy import log10 as log

_col = 'EXPOSURE_TIME'

data1 = df_sds_nonmatched[_col].dropna()
data2 = df_sds_matched[_col].dropna()
                          
legend1 = 'non-matched ({:d})'.format(len(data1))
legend2 = 'matched ({:d})'.format(len(data2))

p = plot_distros(data1, data2, legend1, legend2)

p.title.text = 'SDS exposure-time distribution'
p.title.align = 'center'
p.xaxis.axis_label = '{}'.format(_col)
p.yaxis.axis_label = '#'

p.legend.location = 'top_right'

show(p)

In [48]:
from bokeh.io import show
from numpy import log10 as log

_col = 'nufnu_0.5keV(erg.s-1.cm-2)'

data1 = df_sds_nonmatched[_col].dropna()
data1 = data1.loc[data1>0]
data2 = df_sds_matched[_col].dropna()
data2 = data2.loc[data2>0]
                          
legend1 = 'non-matched ({:d})'.format(len(data1))
legend2 = 'matched ({:d})'.format(len(data2))

p = plot_distros(log(data1), log(data2), legend1, legend2)

_cnm = _col.split('(')[0]
p.title.text = 'SDS x-ray ({}) flux distribution'.format(_cnm)
p.title.align = 'center'
p.xaxis.axis_label = 'log( {} )'.format(_col)
p.yaxis.axis_label = '#'

p.legend.location = 'top_right'

show(p)

In [49]:
from bokeh.io import show
from numpy import log10 as log

_col = 'nufnu_1.5keV(erg.s-1.cm-2)'

data1 = df_sds_nonmatched[_col].dropna()
data1 = data1.loc[data1>0]
data2 = df_sds_matched[_col].dropna()
data2 = data2.loc[data2>0]
                          
legend1 = 'non-matched ({:d})'.format(len(data1))
legend2 = 'matched ({:d})'.format(len(data2))

p = plot_distros(log(data1), log(data2), legend1, legend2)

_cnm = _col.split('(')[0]
p.title.text = 'SDS x-ray ({}) flux distribution'.format(_cnm)
p.title.align = 'center'
p.xaxis.axis_label = 'log( {} )'.format(_col)
p.yaxis.axis_label = '#'

p.legend.location = 'top_right'

show(p)

In [50]:
def plot_distro_flux(title, data1, data2, colname, legend1, legend2):
    from numpy import log10 as log

    _col = colname
    data1 = data1.loc[data1>0]
    data2 = data2.loc[data2>0]

    p = plot_distros(log(data1), log(data2), legend1, legend2)

    p.title.text = title
    p.title.align = 'center'
    p.xaxis.axis_label = 'log( {} )'.format(_col)
    p.yaxis.axis_label = '#'

    p.legend.location = 'top_right'

    show(p)

_col = 'nufnu_4.5keV(erg.s-1.cm-2)'
_cnm = _col.split('(')[0]
title = 'SDS x-ray ({}) flux distribution'.format(_cnm)

data1 = df_sds_nonmatched[_col].dropna()
data2 = df_sds_matched[_col].dropna()
legend1 = 'non-matched ({:d})'.format(len(data1))
legend2 = 'matched ({:d})'.format(len(data2))

from bokeh.io import show
p = plot_distro_flux(title, data1, data2, _col, legend1, legend2)