# Similar objects in simbad, GEV and TEV catalogs
First, we import all the packages that we will need.

In [1]:
import warnings
import pandas as pd
import numpy as np
from astropy.io.votable import parse_single_table, parse
from astropy.io.votable import from_table
import astropy.units as u
from astropy.coordinates import SkyCoord
from gevtev import compare_gev_tev_data 

warnings.filterwarnings("ignore")

This constant determines a threshold for the similarity between the objects:
$$|glat_{1} - glat_{2}| <= \epsilon$$
$$\epsilon = 1*10^{-1}$$

In [2]:
_epsilon = 1e-1

The cell above loads data manually downloaded from SIMBAD catalog by requests 'be' and 'plsr'. It creates a DataFrame with a respective column class. 

Now this dataFrame contains only three columns:
- RA_d
- DEC_d
- MAIN_ID
- class

Other columns with physical sense will be added later.

In [3]:
s = "data/simbad-plsr.xml"
votable1 = parse(s)
votable = parse_single_table(s)
table = votable.to_table()
table_plsr = table['RA_d','DEC_d', 'MAIN_ID']
pd_plsr = table_plsr.to_pandas() 
pd_plsr['class'] = 'plsr'

s = "data/simbad-be.xml"
votable1 = parse(s)
votable = parse_single_table(s)
table = votable.to_table()
table_be = table['RA_d','DEC_d', 'MAIN_ID']
pd_be = table_be.to_pandas()
pd_be['class'] = 'be'

frames_simbda = [pd_be, pd_plsr]
pd_simbad = pd.concat(frames_simbda)

This cell adds "s_" to columns of simbad data to makes them easily distinguishable from data of other catalogs.

In [4]:
plsr_match_names = {}
for i in pd_simbad.columns:
    plsr_match_names.update({i : "s_" + i})
pd_simbad = pd_simbad.rename(columns = plsr_match_names)

In [5]:
pd_simbad.head()

Unnamed: 0,s_RA_d,s_DEC_d,s_MAIN_ID,s_class
0,315.783489,38.99171,b'EM* MWC 1036',be
1,306.792553,38.900434,b'BD+38 4093',be
2,305.765073,39.4972,b'EM* MWC 342',be
3,308.363021,41.59957,b'[D75b] Em* 20+118',be
4,311.70535,43.427917,b'EM* LkHA 133',be


We load data common for GEV and TEV catalogs.

In [6]:
common_data, only_tev_data, only_gev_data = compare_gev_tev_data(_epsilon)

The following cells compare distances from a common catalog of GEV and TEV and SIMBAD catalog by translating SIMBAD coordinates to glan and glot due to astropy module. If their relative distance is smaller than $\epsilon$ they are associated to each another.

In [33]:
def create_vectors_common_gevtev_simbad(gevtev, simbad, epsilon, catalog="tev"):
    """This function looks for the same objects in GeV and TeV and Simbad catalogs 
    
    Return: two vectors where each index of TEV(GEV) is associated with its indev in GEV(TEV).
    If such objects does not existe, the corresponding vector coordinate is equal to -1
    
    gevtev - a pandas dataFrame of common GeV and TeV 
    simbad - a pandas dataFrame of Simbad 
    epsilon(double) - distance accepted as equivalence
    """
    d = {}
    classes = {}
    if (catalog.find("tev") != -1):
        glat_gevtev = gevtev[catalog+'_glat']
        glon_gevtev = gevtev[catalog+'_glon']
    else:
        glat_gevtev = gevtev[catalog+'_GLAT']
        glon_gevtev = gevtev[catalog+'_GLON']        
    pos_ra_simbad = pd_simbad.as_matrix(columns=['s_RA_d'])
    pos_dec_simbad = pd_simbad.as_matrix(columns=['s_DEC_d'])
    c_icrs = SkyCoord(ra=pos_ra_simbad*u.degree, dec=pos_dec_simbad*u.degree, frame='icrs')
    glat_simbad = c_icrs.galactic.b.deg
    glon_simbad = c_icrs.galactic.l.deg
    
    
    C_associations_gevtev = -1.0 * np.ones((len(glat_gevtev)))
    C_associations_simbad = -1.0 * np.ones((len(glat_simbad)))
    
    for i in range(len(glat_gevtev)):
        for j in range(len(pos_ra_simbad)): 
            if ((np.abs(glat_gevtev[i] - glat_simbad[j]) < epsilon) 
                and (np.abs(glon_gevtev[i] - glon_simbad[j]) < epsilon)) :
                    C_associations_gevtev[i] = j
                    C_associations_simbad[j] = i
    return C_associations_gevtev, C_associations_simbad

In [34]:
C_associations_gevtev, C_associations_simbad = create_vectors_common_gevtev_simbad(common_data, pd_simbad, _epsilon)

In [35]:
def create_common_data(data_gevtev, data_simbad, C_associations_gevtev, C_associations_simbad, catalog="tev"):
    """The fonction adds objects found both in GeV, TeV and simbad.
    """
    data_gevtev['join'] = C_associations_gevtev
    pd_common_sgevtev = pd.merge(data_simbad, data_gevtev, left_index=True, right_on='join', how='inner')
    del data_gevtev['join']

    data_simbad['join'] = C_associations_simbad
    pd_common_sgevtev0 = pd.merge(data_simbad, data_gevtev, right_index=True, left_on='join', how='inner')
    
    pd_common_sgevtev = pd_common_sgevtev.append(pd_common_sgevtev0)
    if (catalog.find("tev") != -1):    
        array_non_duplicate = [catalog+'_glat', catalog+'_glon', 's_RA_d','s_DEC_d']
    else:
        array_non_duplicate = [catalog+'_GLAT', catalog+'_GLON', 's_RA_d','s_DEC_d']
    pd_common_sgevtev = pd_common_sgevtev.drop_duplicates(array_non_duplicate)
    pd_common_sgevtev = pd_common_sgevtev.reset_index()
    
    del pd_common_sgevtev['join']
    del pd_common_sgevtev['join_x']
    del pd_common_sgevtev['join_y']
    del pd_common_sgevtev['index']
    #df_common = pd.DataFrame(data = data, columns = namefinal)
    return pd_common_sgevtev

In [10]:
pd_common_gevtevsimbad = create_common_data(common_data, pd_simbad, C_associations_gevtev, C_associations_simbad)
print("There are " + str(len(pd_common_gevtevsimbad)) + " common objects in the three catalogs.")

There are 2 common objects in the three catalogs.


In [11]:
pd_common_gevtevsimbad.head()

Unnamed: 0,level_0,gev_ASSOC_TEV,gev_CLASS1,gev_DEJ2000,gev_Flux1000,gev_Flux10000_100000,gev_Flux1000_3000,gev_Flux100_300,gev_Flux3000_10000,gev_Flux300_1000,...,tev_sed_dnde,tev_sed_dnde_err,tev_sed_e_ref,tev_spec_dnde_1TeV,tev_spec_dnde_1TeV_err,tev_spec_eflux_1TeV_10TeV,tev_spec_eflux_1TeV_10TeV_err,tev_spec_flux_1TeV,tev_spec_flux_1TeV_crab,tev_spec_flux_1TeV_crab_err
0,19,b' ',b' ',-29.063,3.874533e-08,3.055154e-10,3.184363e-08,1.613918e-08,5.155299e-09,9.678488e-08,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,
1,19,b' ',b' ',-29.063,3.874533e-08,3.055154e-10,3.184363e-08,1.613918e-08,5.155299e-09,9.678488e-08,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,


The next cell chooses several columns from a dataFrame to make sure that the output is correct(objects have the same coordinates etc.)

In [12]:
columns_to_compare = ['s_RA_d',
                     's_DEC_d',
                     'gev_ASSOC_TEV',
                     'tev_glat', 
                     'gev_GLAT',
                     'tev_glon',
                     'gev_GLON',
                     's_class',
                     'gev_CLASS1',
                     'tev_classes']
pd_common_gevtevsimbad[columns_to_compare].head()

Unnamed: 0,s_RA_d,s_DEC_d,gev_ASSOC_TEV,tev_glat,gev_GLAT,tev_glon,gev_GLON,s_class,gev_CLASS1,tev_classes
0,266.40753,-28.9545,b' ',-0.046162,-0.020071,359.944244,359.863953,be,b' ',b'unid'
1,244.711241,-50.470085,b' ',-0.046162,-0.020071,359.944244,359.863953,plsr,b' ',b'unid'


In [13]:
pd_common_gevtevsimbad[columns_to_compare].to_csv("gevtevsimbad.txt", sep='\t')

In [14]:
columns_to_compare_gevtev = ['gev_ASSOC_TEV',
                     'tev_glat', 
                     'gev_GLAT',
                     'tev_glon',
                     'gev_GLON',
                     'gev_CLASS1',
                     'tev_classes']

In [15]:
common_data[columns_to_compare_gevtev].to_csv("gevtev.txt", sep='\t')

The next cells find common values in simbad-tev

In [27]:
C_associations_tev, C_associations_simbad = create_vectors_common_gevtev_simbad(only_tev_data, pd_simbad, _epsilon)

In [30]:
pd_common_onlytevsimbad = create_common_data(only_tev_data, pd_simbad, C_associations_tev, C_associations_simbad, catalog="tev")

In [31]:
pd_common_onlytevsimbad.head()

Unnamed: 0,level_0,s_DEC_d,s_MAIN_ID,s_RA_d,s_class,tev_classes,tev_glat,tev_glon,tev_morph_pa,tev_pos_dec,...,tev_sed_dnde,tev_sed_dnde_err,tev_sed_e_ref,tev_spec_dnde_1TeV,tev_spec_dnde_1TeV_err,tev_spec_eflux_1TeV_10TeV,tev_spec_eflux_1TeV_10TeV_err,tev_spec_flux_1TeV,tev_spec_flux_1TeV_crab,tev_spec_flux_1TeV_crab_err
0,19,-28.9545,b'SSTGC 519103',266.40753,be,b'unid',-0.046162,359.944244,,-29.006111,...,"[1.42756e-10, 7.84682e-11, 4.92253e-11, 3.1732...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.165368, 0.204729, 0.251321, 0.307177, 0.374...",2.32248e-12,5.452722e-14,5.835075e-12,3.56109e-13,1.703836e-12,8.21346,0.381336
1,19,-50.470085,b'HD 146461',244.711241,plsr,b'unid',-0.046162,359.944244,,-29.006111,...,"[1.42756e-10, 7.84682e-11, 4.92253e-11, 3.1732...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.165368, 0.204729, 0.251321, 0.307177, 0.374...",2.32248e-12,5.452722e-14,5.835075e-12,3.56109e-13,1.703836e-12,8.21346,0.381336


The next cells find common values in simbad-gev

In [36]:
C_associations_gev, C_associations_simbad = create_vectors_common_gevtev_simbad(only_gev_data, pd_simbad, _epsilon, catalog="gev")

In [38]:
pd_common_onlygevsimbad = create_common_data(only_gev_data, pd_simbad, C_associations_gev, C_associations_simbad, catalog="gev")

In [39]:
pd_common_onlygevsimbad.head()

Unnamed: 0,level_0,gev_ASSOC_TEV,gev_CLASS1,gev_DEJ2000,gev_Flux1000,gev_Flux10000_100000,gev_Flux1000_3000,gev_Flux100_300,gev_Flux3000_10000,gev_Flux300_1000,gev_Flux30_100,gev_GLAT,gev_GLON,gev_RAJ2000,gev_Variability_Index,s_DEC_d,s_MAIN_ID,s_RA_d,s_class
