# Similar objects in simbad, GEV and TEV catalogs
First, we import all the packages that we will need.

In [1]:
import warnings
import pandas as pd
import numpy as np
from astropy.io.votable import parse_single_table, parse
from astropy.io.votable import from_table
import astropy.units as u
from astropy.coordinates import SkyCoord
from gevtev import compare_gev_tev_data 

warnings.filterwarnings("ignore")

This constant determines a threshold for the similarity between the objects:
$$\frac{|glat_{1} - glat_{2}|}{|glat_{1}|} <= \epsilon$$
$$\epsilon = 5*10^{2}$$

In [2]:
_epsilon = 5e-2

The cell above loads data manually downloaded from SIMBAD catalog by requests 'be' and 'plsr'. It creates a DataFrame with a respective column class. 

Now this dataFrame contains only three columns:
- RA_d
- DEC_d
- MAIN_ID
- class

Other columns with physical sense will be added later.

In [3]:
s = "data/simbad-plsr.xml"
votable1 = parse(s)
votable = parse_single_table(s)
table = votable.to_table()
table_plsr = table['RA_d','DEC_d', 'MAIN_ID']
pd_plsr = table_plsr.to_pandas() 
pd_plsr['class'] = 'plsr'

s = "data/simbad-be.xml"
votable1 = parse(s)
votable = parse_single_table(s)
table = votable.to_table()
table_be = table['RA_d','DEC_d', 'MAIN_ID']
pd_be = table_be.to_pandas()
pd_be['class'] = 'be'

frames_simbda = [pd_be, pd_plsr]
pd_simbad = pd.concat(frames_simbda)

This cell adds "s_" to columns of simbad data to makes them easily distinguishable from data of other catalogs.

In [4]:
plsr_match_names = {}
for i in pd_simbad.columns:
    plsr_match_names.update({i : "s_" + i})
pd_simbad = pd_simbad.rename(columns = plsr_match_names)

In [5]:
pd_simbad.head()

Unnamed: 0,s_RA_d,s_DEC_d,s_MAIN_ID,s_class
0,315.783489,38.99171,b'EM* MWC 1036',be
1,306.792553,38.900434,b'BD+38 4093',be
2,305.765073,39.4972,b'EM* MWC 342',be
3,308.363021,41.59957,b'[D75b] Em* 20+118',be
4,311.70535,43.427917,b'EM* LkHA 133',be


We load data common for GEV and TEV catalogs.

In [6]:
common_data, only_tev_data, only_gev_data = compare_gev_tev_data()

The following cells compare distances from a common catalog of GEV and TEV and SIMBAD catalog by translating SIMBAD coordinates to glan and glot due to astropy module. If their relative distance is smaller than $\epsilon$ they are associated to each another.

In [7]:
def create_vectors_common_gevtev_simbad(gevtev, simbad, epsilon):
    """This function looks for the same objects in GeV and TeV and Simbad catalogs 
    
    Return: two vectors where each index of TEV(GEV) is associated with its indev in GEV(TEV).
    If such objects does not existe, the corresponding vector coordinate is equal to -1
    
    gevtev - a pandas dataFrame of common GeV and TeV 
    simbad - a pandas dataFrame of Simbad 
    epsilon(double) - distance accepted as equivalence
    """
    d = {}
    classes = {}
    glat_gevtev = gevtev['tev_glat']
    glon_gevtev = gevtev['tev_glon']
    pos_ra_simbad = pd_simbad.as_matrix(columns=['s_RA_d'])
    pos_dec_simbad = pd_simbad.as_matrix(columns=['s_DEC_d'])
    c_icrs = SkyCoord(ra=pos_ra_simbad*u.degree, dec=pos_dec_simbad*u.degree, frame='icrs')
    glat_simbad = c_icrs.galactic.b.deg
    glon_simbad = c_icrs.galactic.l.deg
    class_simbad = pd_simbad.as_matrix(columns=['s_class'])
    class_gevtev = gevtev['gev_CLASS1']
    
    
    C_associations_gevtev = -1.0 * np.ones((len(glat_gevtev)))
    C_associations_simbad = -1.0 * np.ones((len(glat_simbad)))
    
    for i in range(len(glat_gevtev)):
        for j in range(len(pos_ra_simbad)):            
            if ((np.abs(glat_gevtev[i] - glat_simbad[j])/np.abs(glat_simbad[j]) < epsilon) 
                and (np.abs(glon_gevtev[i] - glon_simbad[j])/np.abs(glon_simbad[j]) < epsilon)) :
                    C_associations_gevtev[i] = j
                    C_associations_simbad[j] = i
    return C_associations_gevtev, C_associations_simbad

In [8]:
C_associations_gevtev, C_associations_simbad = create_vectors_common_gevtev_simbad(common_data, pd_simbad, _epsilon)

In [9]:
def create_common_data(data_gevtev, data_simbad, C_associations_gevtev, C_associations_simbad):
    """The fonction adds objects found both in GeV, TeV and simbad.
    """
    data_gevtev['join'] = C_associations_gevtev
    pd_common_sgevtev = pd.merge(data_simbad, data_gevtev, left_index=True, right_on='join', how='inner')
    del data_gevtev['join']

    data_simbad['join'] = C_associations_simbad
    pd_common_sgevtev0 = pd.merge(data_simbad, data_gevtev, right_index=True, left_on='join', how='inner')
    
    pd_common_sgevtev = pd_common_sgevtev.append(pd_common_sgevtev0)
    
    array_non_duplicate = ['gev_GLAT', 'gev_GLON', 's_RA_d','s_DEC_d']
    pd_common_sgevtev = pd_common_sgevtev.drop_duplicates(array_non_duplicate)
    pd_common_sgevtev = pd_common_sgevtev.reset_index()
    
    del pd_common_sgevtev['join']
    del pd_common_sgevtev['join_x']
    del pd_common_sgevtev['join_y']
    del pd_common_sgevtev['index']
    #df_common = pd.DataFrame(data = data, columns = namefinal)
    return pd_common_sgevtev

In [10]:
pd_common_gevtevsimbad = create_common_data(common_data, pd_simbad, C_associations_gevtev, C_associations_simbad)
print("There are " + str(len(pd_common_gevtevsimbad)) + " common objects in the three catalogs.")

There are 41 common objects in the three catalogs.


In [11]:
pd_common_gevtevsimbad.head()

Unnamed: 0,level_0,gev_ASSOC_TEV,gev_CLASS1,gev_DEJ2000,gev_Flux1000,gev_Flux10000_100000,gev_Flux1000_3000,gev_Flux100_300,gev_Flux3000_10000,gev_Flux300_1000,...,tev_sed_dnde,tev_sed_dnde_err,tev_sed_e_ref,tev_spec_dnde_1TeV,tev_spec_dnde_1TeV_err,tev_spec_eflux_1TeV_10TeV,tev_spec_eflux_1TeV_10TeV_err,tev_spec_flux_1TeV,tev_spec_flux_1TeV_crab,tev_spec_flux_1TeV_crab_err
0,45,b'HESS J1616-508 ',b'PWN ',-50.91,1.390171e-08,9.276251e-10,1.036945e-08,1.936115e-07,2.540761e-09,5.640028e-08,...,"[6.08884e-11, 1.76839e-11, 4.3537e-12, 1.47045...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.378819, 0.667119, 1.21383, 2.12929, 3.80595...",6.655983e-12,4.635425e-13,1.685885e-11,2.042771e-12,4.930357e-12,23.767134,2.544139
1,45,b'HESS J1616-508 ',b'PWN ',-50.91,1.390171e-08,9.276251e-10,1.036945e-08,1.936115e-07,2.540761e-09,5.640028e-08,...,"[6.08884e-11, 1.76839e-11, 4.3537e-12, 1.47045...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.378819, 0.667119, 1.21383, 2.12929, 3.80595...",6.655983e-12,4.635425e-13,1.685885e-11,2.042771e-12,4.930357e-12,23.767134,2.544139
2,54,b' ',b'PSR ',-58.844601,1.541394e-08,1.517636e-10,1.266997e-08,2.966954e-08,2.701655e-09,3.666902e-08,...,"[4.47255e-11, 2.04312e-11, 1.60652e-11, 1.2089...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.281317, 0.341012, 0.415124, 0.503835, 0.610...",2.64755e-12,1.145111e-13,6.911154e-12,5.918045e-13,2.00572e-12,9.668713,0.701973
3,54,b' ',b'PSR ',-58.844601,1.541394e-08,1.517636e-10,1.266997e-08,2.966954e-08,2.701655e-09,3.666902e-08,...,"[4.47255e-11, 2.04312e-11, 1.60652e-11, 1.2089...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.281317, 0.341012, 0.415124, 0.503835, 0.610...",2.64755e-12,1.145111e-13,6.911154e-12,5.918045e-13,2.00572e-12,9.668713,0.701973
4,30,b'SNR G292.2-00.5 ',b'PSR ',-61.466202,6.672389e-09,1.386517e-10,5.567922e-09,7.386492e-08,1.058768e-09,2.716641e-08,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,


The next cell chooses several columns from a dataFrame to make sure that the output is correct(objects have the same coordinates etc.)

In [12]:
columns_to_compare = ['s_RA_d',
                     's_DEC_d',
                     'gev_ASSOC_TEV',
                     'tev_glat', 
                     'gev_GLAT',
                     'tev_glon',
                     'gev_GLON',
                     's_class',
                     'gev_CLASS1',
                     'tev_classes']
pd_common_gevtevsimbad[columns_to_compare].head()

Unnamed: 0,s_RA_d,s_DEC_d,gev_ASSOC_TEV,tev_glat,gev_GLAT,tev_glon,gev_GLON,s_class,gev_CLASS1,tev_classes
0,258.575005,-38.983235,b'HESS J1616-508 ',-0.130943,-0.131106,332.365143,332.365601,be,b'PWN ',b'pwn'
1,159.289024,-12.831429,b'HESS J1616-508 ',-0.130943,-0.131106,332.365143,332.365601,plsr,b'PWN ',b'pwn'
2,260.231405,-38.000102,b' ',-0.62219,-0.617694,353.565063,319.975891,be,b'PSR ',b'snr'
3,162.124682,-14.136619,b' ',-0.62219,-0.617694,353.565063,319.975891,plsr,b'PSR ',b'snr'
4,162.53721,-59.88886,b'SNR G292.2-00.5 ',-0.536943,-0.541801,292.151306,292.145721,be,b'PSR ',b'pwn'
