# Similar objects in simbad, GEV and TEV catalogs
First, we import all the packages that we will need.

In [1]:
import warnings
import pandas as pd
import numpy as np
from astropy.io.votable import parse_single_table, parse
from astropy.io.votable import from_table
import astropy.units as u
from astropy.coordinates import SkyCoord
from gevtev import compare_gev_tev_data 

warnings.filterwarnings("ignore")

This constant determines a threshold for the similarity between the objects:
$$|glat_{1} - glat_{2}| <= \epsilon$$
$$\epsilon = 1*10^{-1}$$

In [2]:
_epsilon = 1e-1

The cell above loads data from SIMBAD catalog obtained by requests 'Be', 'O', 'B' and 'plsr'. If the file "data/symbad.txt" does not exist, run getsimbad.py.

In [3]:
pd_simbad = pd.read_csv("data/symbad.txt", sep='\t', encoding='utf-8')
del pd_simbad['Unnamed: 0']

In [4]:
pd_simbad.head()

Unnamed: 0,s_MAIN_ID,s_RA,s_DEC,s_RA_PREC,s_DEC_PREC,s_COO_ERR_MAJA,s_COO_ERR_MINA,s_COO_ERR_ANGLE,s_COO_QUAL,s_COO_WAVELENGTH,s_COO_BIBCODE,s_class,s_glat,s_glon
0,b'HD 201433A',21 08 38.894,+30 12 20.27,7.0,7.0,26.0,26.0,90.0,B,O,b'2000A&A...355L..27H',plsr,-11.774311,76.107743
1,b'* nu. For',02 04 29.4462,-29 17 48.491,14.0,14.0,0.1834,0.1942,90.0,A,O,b'2018yCat.1345....0G',plsr,-73.777993,224.887601
2,b'* phi Her',16 08 46.1668,+44 56 05.732,14.0,14.0,0.371,0.3754,90.0,A,O,b'2018yCat.1345....0G',plsr,47.1074,70.847825
3,b'V* CG And',00 00 43.6345,+45 15 12.002,14.0,14.0,0.055,0.0422,90.0,A,O,b'2018yCat.1345....0G',plsr,-16.702455,113.649358
4,b'HD 133029',15 00 38.7184,+47 16 38.793,14.0,14.0,0.0472,0.0421,90.0,A,O,b'2018yCat.1345....0G',plsr,57.710074,80.191942


We load data common for GEV and TEV catalogs.

In [5]:
common_data, only_tev_data, only_gev_data = compare_gev_tev_data(_epsilon)

The following cells compare distances from a common catalog of GEV and TEV and SIMBAD catalog by translating SIMBAD coordinates to glan and glot due to astropy module. If their relative distance is smaller than $\epsilon$ they are associated to each another.

In [6]:
def create_vectors_common_gevtev_simbad(gevtev, simbad, epsilon, catalog="tev"):
    """This function looks for the same objects in GeV and TeV and Simbad catalogs 
    
    Return: two vectors where each index of TEV(GEV) is associated with its indev in GEV(TEV).
    If such objects does not existe, the corresponding vector coordinate is equal to -1
    
    gevtev - a pandas dataFrame of common GeV and TeV 
    simbad - a pandas dataFrame of Simbad 
    epsilon(double) - distance accepted as equivalence
    """
    d = {}
    classes = {}
    if (catalog.find("tev") != -1):
        glat_gevtev = gevtev[catalog+'_glat']
        glon_gevtev = gevtev[catalog+'_glon']
    else:
        glat_gevtev = gevtev[catalog+'_GLAT']
        glon_gevtev = gevtev[catalog+'_GLON']        

    glat_simbad = simbad['s_glat']
    glon_simbad = simbad['s_glon']
    
    C_associations_gevtev = -1.0 * np.ones((len(glat_gevtev)))
    C_associations_simbad = -1.0 * np.ones((len(glat_simbad)))
    
    for i in range(len(glat_gevtev)):
        for j in range(len(glat_simbad)):
            if ((np.abs(glat_gevtev[i] - glat_simbad[j]) < epsilon) 
                and (np.abs(glon_gevtev[i] - glon_simbad[j]) < epsilon)) :
                    C_associations_gevtev[i] = j
                    C_associations_simbad[j] = i
    return C_associations_gevtev, C_associations_simbad

In [7]:
C_associations_gevtev, C_associations_simbad = create_vectors_common_gevtev_simbad(common_data, pd_simbad, _epsilon)

In [8]:
def create_common_data(data_gevtev, data_simbad, C_associations_gevtev, C_associations_simbad, catalog="tev"):
    """The fonction adds objects found both in GeV, TeV and simbad.
    """
    data_gevtev['join'] = C_associations_gevtev

    pd_common_sgevtev = pd.merge(data_simbad, data_gevtev, left_index=True, right_on='join', how='inner')
    del data_gevtev['join']
    
    try:
        del pd_common_sgevtev['join_y']
    except:
        pass
    
    data_simbad['join'] = C_associations_simbad
    pd_common_sgevtev0 = pd.merge(data_simbad, data_gevtev, right_index=True, left_on='join', how='inner')
    
    pd_common_sgevtev = pd_common_sgevtev.append(pd_common_sgevtev0)
    if (catalog.find("tev") != -1):    
        array_non_duplicate = [catalog+'_glat', catalog+'_glon', 's_glat','s_glon']
    else:
        array_non_duplicate = [catalog+'_GLAT', catalog+'_GLON', 's_glat','s_glon']
    pd_common_sgevtev = pd_common_sgevtev.drop_duplicates(array_non_duplicate)
    pd_common_sgevtev = pd_common_sgevtev.reset_index()
    try:
        del pd_common_sgevtev['join_x']
    except KeyError:
        pass
    try:
        del pd_common_sgevtev['join']
    except:
        pass
    try:
        del pd_common_sgevtev['index']
    except:
        pass
    #df_common = pd.DataFrame(data = data, columns = namefinal)
    return pd_common_sgevtev

In [9]:
pd_common_gevtevsimbad = create_common_data(common_data, pd_simbad, C_associations_gevtev, C_associations_simbad)
print("There are " + str(len(pd_common_gevtevsimbad)) + " common objects in the three catalogs.")

There are 7 common objects in the three catalogs.


In [10]:
pd_common_gevtevsimbad.head()

Unnamed: 0,level_0,gev_ASSOC_TEV,gev_CLASS1,gev_DEJ2000,gev_Flux1000,gev_Flux10000_100000,gev_Flux1000_3000,gev_Flux100_300,gev_Flux3000_10000,gev_Flux300_1000,...,tev_sed_dnde,tev_sed_dnde_err,tev_sed_e_ref,tev_spec_dnde_1TeV,tev_spec_dnde_1TeV_err,tev_spec_eflux_1TeV_10TeV,tev_spec_eflux_1TeV_10TeV_err,tev_spec_flux_1TeV,tev_spec_flux_1TeV_crab,tev_spec_flux_1TeV_crab_err
0,23,b'HESS J1841-055 ',b'PWN ',-5.55,1.18466e-08,1.087441e-09,8.55955e-09,5.170882e-09,3.236763e-09,1.076956e-08,...,"[1.8321e-11, 8.4412e-12, 1.89955e-12, 3.35585e...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.738068, 1.29739, 2.29365, 4.04547, 7.1601, ...",1.28e-11,1.3e-12,3.055946e-11,3.910089e-12,9.078014e-12,43.761204,5.091009
1,19,b' ',b' ',-29.063,3.874533e-08,3.055154e-10,3.184363e-08,1.613918e-08,5.155299e-09,9.678488e-08,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,
2,20,b' ',b'pwn ',-28.8627,2.752177e-08,4.572024e-10,2.235459e-08,1.85101e-09,4.686388e-09,4.536945e-08,...,"[3.233e-12, 4.407e-13, 2.95e-13, 4.42e-14, 6.1...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.3979, 0.5622, 0.7943, 1.122, 1.585, 2.24, 3...",1.8e-13,5e-14,5.321444e-13,1.861589e-13,1.5e-13,0.723085,0.234238
3,29,b' ',b'pwn ',-28.8627,2.752177e-08,4.572024e-10,2.235459e-08,1.85101e-09,4.686388e-09,4.536945e-08,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,
4,251,b' ',b' ',-29.063,3.874533e-08,3.055154e-10,3.184363e-08,1.613918e-08,5.155299e-09,9.678488e-08,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,


The next cell chooses several columns from a dataFrame to make sure that the output is correct(objects have the same coordinates etc.)

In [11]:
columns_to_compare = ['s_glat',
                     's_glon',
                     'gev_ASSOC_TEV',
                     'tev_glat', 
                     'gev_GLAT',
                     'tev_glon',
                     'gev_GLON',
                     's_class',
                     'gev_CLASS1',
                     'tev_classes']
pd_common_gevtevsimbad[columns_to_compare].head()

Unnamed: 0,s_glat,s_glon,gev_ASSOC_TEV,tev_glat,gev_GLAT,tev_glon,gev_GLON,s_class,gev_CLASS1,tev_classes
0,-0.198384,26.857782,b'HESS J1841-055 ',-0.197604,-0.19859,26.795113,26.795933,B,b'PWN ',b'unid'
1,-0.042219,359.951421,b' ',-0.046162,-0.020071,359.944244,359.863953,O,b' ',b'unid'
2,-0.068187,0.059459,b' ',-0.113206,-0.103157,0.140617,0.149201,O,b'pwn ',b'unid'
3,-0.068187,0.059459,b' ',-0.147977,-0.103157,0.05499,0.149201,O,b'pwn ',b'unid'
4,-0.011453,359.985525,b' ',-0.046162,-0.020071,359.944244,359.863953,Be,b' ',b'unid'


In [12]:
columns_to_compare_gevtev = ['gev_ASSOC_TEV',
                     'tev_glat', 
                     'gev_GLAT',
                     'tev_glon',
                     'gev_GLON',
                     'gev_CLASS1',
                     'tev_classes']

In [13]:
common_data[columns_to_compare_gevtev].to_csv("gevtev.txt", sep='\t')

The next cells find common values in simbad-tev

In [14]:
C_associations_tev, C_associations_simbad = create_vectors_common_gevtev_simbad(only_tev_data, pd_simbad, _epsilon)

In [15]:
pd_common_onlytevsimbad = create_common_data(only_tev_data, pd_simbad, C_associations_tev, C_associations_simbad, catalog="tev")

In [26]:
print("There are " + str(len(pd_common_onlytevsimbad)) + " common objects in the tev and simbad catalogs.")

There are 23 common objects in the tev and simbad catalogs.


In [17]:
pd_common_onlytevsimbad['s_MAIN_ID']

0                        b'[KKM2013] 7'
1            b'MSX6C G331.5487-00.5364'
2                    b'LS  IV -05   11'
3                       b'CPD-69   416'
4                           b'GCIRS  8'
5                       b'[MCD2010] 13'
6                       b'[MCD2010] 13'
7                           b'VFTS 318'
8                   b'UCAC4 105-013663'
9          b'[AAA97b] J053648.7-691700'
10                      b'[ST92] 2-62B'
11                       b'[ST92] 2-62'
12                b'[M2002] LMC 164553'
13                b'[M2002] LMC 161594'
14                      b'SSTGC 519103'
15                         b'GCIRS  1W'
16                           b'RMC 141'
17                            b'HTR 13'
18       b'Cl* NGC 2070    SMB     283'
19    b'SSTISAGE1C J053736.75-690633.4'
20       b'Cl* NGC 2070    SMB     455'
21       b'Cl* NGC 2070    SMB     163'
22                      b'[MCD2010] 18'
Name: s_MAIN_ID, dtype: object

The next cells find common values in simbad-gev

This cell can take up to 40 minutes.

In [18]:
C_associations_gev, C_associations_simbad = create_vectors_common_gevtev_simbad(only_gev_data, pd_simbad, _epsilon, catalog="gev")

In [19]:
pd_common_onlygevsimbad = create_common_data(only_gev_data, pd_simbad, C_associations_gev, C_associations_simbad, catalog="gev")

In [20]:
pd_common_onlygevsimbad['s_MAIN_ID']

0                  b'LS   I +56   58'
1           b'GALEX J175340.5-500741'
2                   b'[KRL2007b] 194'
3                  b'LS  IV -03    6'
4                     b'EM* GGR  129'
5         b'SDSS J085023.10+485826.3'
6                  b'LS  IV -14   70'
7                       b'Hen 3-1611'
8                  b'LS III +50    6'
9                          b'LIN 341'
10                 b'LS  IV -05   11'
11        b'SDSS J134029.74+441346.8'
12        b'SDSS J232339.56-031507.7'
13                 b'TYC 8958-3477-1'
14                       b'MAC 1-531'
15                       b'HD  92406'
16                 b'LS III +60   19'
17                       b'HD 295998'
18                     b'PG 0042+211'
19                      b'LLNS  2720'
20                    b'CPD-58  2614'
21                    b'[MCF2015] 22'
22                        b'GCIRS  8'
23                        b'GCIRS  8'
24                    b'[MCD2010] 13'
25                  b'TYC 3156-998-1'
26          

In [25]:
print("There are " + str(len(pd_common_onlygevsimbad)) + " common objects in the gev and simbad catalogs.")

There are 35 common objects in the gev and simbad catalogs.


In [22]:
pd_common_gevtevsimbad[columns_to_compare].to_csv("gevtevsimbad.txt", sep='\t')

In [23]:
pd_common_onlytevsimbad.to_csv("tevsimbad.txt", sep='\t')

In [24]:
pd_common_onlygevsimbad.to_csv("gevsimbad.txt", sep='\t')