# Similar objects in simbad, GEV and TEV catalogs
First, we import all the packages that we will need.

In [1]:
import warnings
import pandas as pd
import numpy as np
from getsimbad import get_simbad_data
from gevtev import compare_gev_tev_data

warnings.filterwarnings("ignore")

This constant determines a threshold for the similarity between the objects:
$$|glat_{1} - glat_{2}| <= \epsilon$$
$$\epsilon = 1,2*10^{-1}$$

In [2]:
_epsilon = 1.2e-1

The cell above loads data from SIMBAD catalog obtained by requests 'Be', 'O', 'B' and 'plsr'. If the file "data/symbad.txt" does not exist, run getsimbad.py.

In [3]:
try:
    pd_simbad = pd.read_csv("data/simbad.txt", sep='\t', encoding='utf-8')
    del pd_simbad['Unnamed: 0']
except FileNotFoundError:
    pd_simbad = get_simbad_data()


In [4]:
pd_simbad.head()

Unnamed: 0,s_MAIN_ID,s_RA,s_DEC,s_RA_PREC,s_DEC_PREC,s_COO_ERR_MAJA,s_COO_ERR_MINA,s_COO_ERR_ANGLE,s_COO_QUAL,s_COO_WAVELENGTH,...,s_FLUX_H,s_FLUX_K,s_FLUX_u,s_FLUX_g,s_FLUX_r,s_FLUX_i,s_FLUX_z,s_class,s_glat,s_glon
0,b'HD 201433A',21 08 38.894,+30 12 20.27,7.0,7.0,26.0,26.0,90.0,B,O,...,,,,,,,,plsr,-11.774311,76.107743
1,b'* nu. For',02 04 29.4462,-29 17 48.491,14.0,14.0,0.1834,0.1942,90.0,A,O,...,5.048,5.015,,,,,,plsr,-73.777993,224.887601
2,b'* phi Her',16 08 46.1668,+44 56 05.732,14.0,14.0,0.371,0.3754,90.0,A,O,...,4.33,4.32,,,,,,plsr,47.1074,70.847825
3,b'V* CG And',00 00 43.6345,+45 15 12.002,14.0,14.0,0.055,0.0422,90.0,A,O,...,6.506,6.511,,,,,,plsr,-16.702455,113.649358
4,b'HD 133029',15 00 38.7184,+47 16 38.793,14.0,14.0,0.0472,0.0421,90.0,A,O,...,6.585,6.57,,,,,,plsr,57.710074,80.191942


We load data common for GEV and TEV catalogs.

In [5]:
common_data, only_tev_data, only_gev_data = compare_gev_tev_data(_epsilon)

The following cells compare distances from a common catalog of GEV and TEV and SIMBAD catalog by translating SIMBAD coordinates to glan and glot due to astropy module. If their relative distance is smaller than $\epsilon$ they are associated to each another.

In [8]:
def create_vectors_common_gevtev_simbad_old(gevtev, simbad, epsilon, catalog="tev"):
    """This function looks for the same objects in GeV and TeV and Simbad catalogs 
    
    Return: two vectors where each index of TEV(GEV) is associated with its indev in GEV(TEV).
    If such objects does not existe, the corresponding vector coordinate is equal to -1
    
    gevtev - a pandas dataFrame of common GeV and TeV 
    simbad - a pandas dataFrame of Simbad 
    epsilon(double) - distance accepted as equivalence
    """
    d = {}
    classes = {}
    if (catalog.find("tev") != -1):
        glat_gevtev = gevtev[catalog+'_glat']
        glon_gevtev = gevtev[catalog+'_glon']
    else:
        glat_gevtev = gevtev[catalog+'_GLAT']
        glon_gevtev = gevtev[catalog+'_GLON']        

    glat_simbad = simbad['s_glat']
    glon_simbad = simbad['s_glon']
    
    C_associations_gevtev = -1.0 * np.ones((len(glat_gevtev)))
    C_associations_simbad = -1.0 * np.ones((len(glat_simbad)))
    
    # в данной реализации:
    # если для объекта i gevtev по расстоянию подходит несколько объектов из simbad, 
    # то привязан он будет к последнему
    for i in range(len(glat_gevtev)):
        for j in range(len(glat_simbad)):
            if ((np.abs(glat_gevtev[i] - glat_simbad[j]) < epsilon) 
                and (np.abs(glon_gevtev[i] - glon_simbad[j]) < epsilon)) :
                    C_associations_gevtev[i] = j
                    C_associations_simbad[j] = i
    return C_associations_gevtev, C_associations_simbad

In [15]:
def create_vectors_common_gevtev_simbad(gevtev, simbad, epsilon, catalog="tev"):
    """This function looks for the same objects in GeV and TeV and Simbad catalogs 
    
    Return: two vectors where each index of TEV(GEV) is associated with its indev in GEV(TEV).
    If such objects does not existe, the corresponding vector coordinate is equal to -1
    
    gevtev - a pandas dataFrame of common GeV and TeV 
    simbad - a pandas dataFrame of Simbad 
    epsilon(double) - distance accepted as equivalence
    """
    d = {}
    classes = {}
    if (catalog.find("tev") != -1):
        glat_gevtev = gevtev[catalog+'_glat']
        glon_gevtev = gevtev[catalog+'_glon']
    else:
        glat_gevtev = gevtev[catalog+'_GLAT']
        glon_gevtev = gevtev[catalog+'_GLON']        

    glat_simbad = simbad['s_glat']
    glon_simbad = simbad['s_glon']
    
    C_associations_gevtev = -1.0 * np.ones((len(glat_gevtev)))
    C_associations_simbad = -1.0 * np.ones((len(glat_simbad)))
    
    glat_dif_matrix = np.dot(np.vstack((glat_gevtev, -np.ones_like(glat_gevtev))).T,
                            np.vstack((np.ones_like(glat_simbad), glat_simbad)))
    glon_dif_matrix = np.dot(np.vstack((glon_gevtev, -np.ones_like(glon_gevtev))).T,
                            np.vstack((np.ones_like(glon_simbad), glon_simbad)))
    pairs_matrix = np.logical_and(np.abs(glat_dif_matrix) < epsilon, 
                                np.abs(glon_dif_matrix) < epsilon)
    
    for i in range(C_associations_gevtev.size):
        associations = np.argwhere(pairs_matrix[i])
        if associations.size > 0:
            C_associations_gevtev[i] = associations.max()
            
    for j in range(C_associations_simbad.size):
        associations = np.argwhere(pairs_matrix[:, j])
        if associations.size > 0:
            C_associations_simbad[j] = associations.max()

    return C_associations_gevtev, C_associations_simbad

Comparison: 

In [10]:
import time

In [11]:
start_time = time.time()
C_associations_gevtev_old, C_associations_simbad_old = create_vectors_common_gevtev_simbad_old(common_data, pd_simbad, _epsilon)
print(time.time() - start_time)
start_time = time.time()
C_associations_gevtev, C_associations_simbad = create_vectors_common_gevtev_simbad(common_data, pd_simbad, _epsilon)
print(time.time() - start_time)
print((C_associations_gevtev_old == C_associations_gevtev).all(), (C_associations_simbad_old == C_associations_simbad).all())

27.991779804229736
0.16498470306396484
True True


In [12]:
start_time = time.time()
C_associations_gevtev_old, C_associations_simbad_old = create_vectors_common_gevtev_simbad_old(only_tev_data, pd_simbad, _epsilon)
print(time.time() - start_time)
start_time = time.time()
C_associations_gevtev, C_associations_simbad = create_vectors_common_gevtev_simbad(only_tev_data, pd_simbad, _epsilon)
print(time.time() - start_time)
print((C_associations_gevtev_old == C_associations_gevtev).all(), (C_associations_simbad_old == C_associations_simbad).all())

43.52845478057861
0.18444347381591797
True True


In [14]:
start_time = time.time()
C_associations_gevtev_old, C_associations_simbad_old = create_vectors_common_gevtev_simbad_old(only_gev_data, pd_simbad, _epsilon, catalog="gev")
print(time.time() - start_time)
start_time = time.time()
C_associations_gevtev, C_associations_simbad = create_vectors_common_gevtev_simbad(only_gev_data, pd_simbad, _epsilon, catalog="gev")
print(time.time() - start_time)
print((C_associations_gevtev_old == C_associations_gevtev).all(), (C_associations_simbad_old == C_associations_simbad).all())

1430.0458295345306
9.62966251373291
True True


In [16]:
C_associations_gevtev, C_associations_simbad = create_vectors_common_gevtev_simbad(common_data, pd_simbad, _epsilon)

In [17]:
def create_common_data(data_gevtev, data_simbad, C_associations_gevtev, C_associations_simbad, catalog="tev"):
    """The fonction adds objects found both in GeV, TeV and simbad.
    """
    data_gevtev['join'] = C_associations_gevtev

    pd_common_sgevtev = pd.merge(data_simbad, data_gevtev, left_index=True, right_on='join', how='inner')
    del data_gevtev['join']
    
    try:
        del pd_common_sgevtev['join_y']
    except:
        pass
    
    data_simbad['join'] = C_associations_simbad
    pd_common_sgevtev0 = pd.merge(data_simbad, data_gevtev, right_index=True, left_on='join', how='inner')
    del data_simbad['join']
    
    pd_common_sgevtev = pd_common_sgevtev.append(pd_common_sgevtev0)
    if (catalog.find("tev") != -1):    
        array_non_duplicate = [catalog+'_glat', catalog+'_glon', 's_glat','s_glon']
    else:
        array_non_duplicate = [catalog+'_GLAT', catalog+'_GLON', 's_glat','s_glon']
    pd_common_sgevtev = pd_common_sgevtev.drop_duplicates(array_non_duplicate)
    pd_common_sgevtev = pd_common_sgevtev.reset_index()
    try:
        del pd_common_sgevtev['join_x']
    except KeyError:
        pass
    try:
        del pd_common_sgevtev['join']
    except:
        pass
    try:
        del pd_common_sgevtev['index']
    except:
        pass
    #df_common = pd.DataFrame(data = data, columns = namefinal)
    return pd_common_sgevtev

In [18]:
pd_common_gevtevsimbad = create_common_data(common_data, pd_simbad, C_associations_gevtev, C_associations_simbad)
print("There are " + str(len(pd_common_gevtevsimbad)) + " common objects in the three catalogs.")

There are 68 common objects in the three catalogs.


In [19]:
pd_common_gevtevsimbad.head()

Unnamed: 0,level_0,gev_0FGL_Name,gev_1FGL_Name,gev_1FHL_Name,gev_2FGL_Name,gev_ASSOC1,gev_ASSOC2,gev_ASSOC_GAM1,gev_ASSOC_GAM2,gev_ASSOC_GAM3,...,tev_spec_pl_norm_err,tev_spec_pl_norm_err_sys,tev_spec_theta,tev_spec_type,tev_tevcat2_id,tev_tevcat_id,tev_tevcat_name,tev_tgevcat_id,tev_tgevcat_name,tev_where
0,79,b' ',b' ',b' ',b' ',b' ',b' ',b' ',b' ',b' ',...,,,,b'none',b'PehQOM',121,b'TeV J1745-290d',107,b'TeV J1745-2900',b'gal'
1,81,b' ',b'1FGL J1746.4-2849c',b'1FHL J1746.3-2851 ',b'2FGL J1746.6-2851c',b'PWN G0.13-0.11 ',b' ',b' ',b'3EG J1746-2851',b' ',...,5e-14,,0.09,b'pl',b'LsZtAg',277,b'TeV J1746-289',-9223372036854775808,b'',b'gal'
2,108,b' ',b'1FGL J1746.4-2849c',b'1FHL J1746.3-2851 ',b'2FGL J1746.6-2851c',b'PWN G0.13-0.11 ',b' ',b' ',b'3EG J1746-2851',b' ',...,,,,b'none',b'LsZtAg',269,b'TeV J1746-289',-9223372036854775808,b'',b'gal'
3,97,b'0FGL J2032.2+4122',b'1FGL J2032.2+4127 ',b'1FHL J2032.1+4125 ',b'2FGL J2032.2+4126 ',b'LAT PSR J2032+4127 ',b' ',b'1AGL J2032+4102',b' ',b' ',...,1.6e-13,2.2e-13,,b'pl',b'3IY6Gd',87,b'TeV J2032+415',146,b'TeV J2031+4133',b'gal'
4,37,b'0FGL J1018.2-5858',b'1FGL J1018.6-5856 ',b'1FHL J1018.9-5855 ',b'2FGL J1019.0-5856 ',b'1FGL J1018.6-5856 ',b' ',b' ',b' ',b' ',...,4e-14,5.8e-14,,b'pl',b'5FlzLA',237,b'TeV J1018-589',45,b'TeV J1018-5856',b'gal'


The next cell chooses several columns from a dataFrame to make sure that the output is correct(objects have the same coordinates etc.)

In [20]:
columns_to_compare = ['s_glat',
                     's_glon',
                     'gev_ASSOC_TEV',
                     'tev_glat', 
                     'gev_GLAT',
                     'tev_glon',
                     'gev_GLON',
                     's_class',
                     'gev_CLASS1',
                     'tev_classes']
pd_common_gevtevsimbad[columns_to_compare].head()

Unnamed: 0,s_glat,s_glon,gev_ASSOC_TEV,tev_glat,gev_GLAT,tev_glon,gev_GLON,s_class,gev_CLASS1,tev_classes
0,-0.042219,359.951421,b' ',-0.046162,-0.020071,359.944244,359.863953,O,b' ',b'unid'
1,-0.068187,0.059459,b' ',-0.113206,-0.103157,0.140617,0.149201,O,b'pwn ',b'unid'
2,-0.068187,0.059459,b' ',-0.147977,-0.103157,0.05499,0.149201,O,b'pwn ',b'unid'
3,1.005632,80.270553,b'TeV J2032+4130 ',1.111117,1.018827,80.299477,80.218597,B0Ve,b'PSR ',b'unid'
4,-1.678443,284.361358,b'HESS J1018-589 ',-1.779517,-1.690106,284.314087,284.351227,B0Ve,b'HMB ',b'bin'


In [21]:
columns_to_compare_gevtev = ['gev_ASSOC_TEV',
                     'tev_glat', 
                     'gev_GLAT',
                     'tev_glon',
                     'gev_GLON',
                     'gev_CLASS1',
                     'tev_classes']

In [22]:
common_data.to_csv("gevtev.txt", sep='\t')

The next cells find common values in simbad-tev

In [23]:
C_associations_tev, C_associations_simbad = create_vectors_common_gevtev_simbad(only_tev_data, pd_simbad, _epsilon)

In [24]:
pd_common_onlytevsimbad = create_common_data(only_tev_data, pd_simbad, C_associations_tev, C_associations_simbad, catalog="tev")

In [25]:
print("There are " + str(len(pd_common_onlytevsimbad)) + " common objects in the tev and simbad catalogs.")

There are 123 common objects in the tev and simbad catalogs.


The next cells find common values in simbad-gev

This cell can take up to 40 minutes.

In [26]:
C_associations_gev, C_associations_simbad = create_vectors_common_gevtev_simbad(only_gev_data, pd_simbad, _epsilon, catalog="gev")

In [27]:
pd_common_onlygevsimbad = create_common_data(only_gev_data, pd_simbad, C_associations_gev, C_associations_simbad, catalog="gev")

In [28]:
print("There are " + str(len(pd_common_onlygevsimbad)) + " common objects in the gev and simbad catalogs.")

There are 154 common objects in the gev and simbad catalogs.


In [29]:
pd_common_gevtevsimbad.to_csv("data/gevtevsimbad.txt", sep='\t')
pd_common_onlygevsimbad.to_csv("data/gevsimbad.txt", sep='\t')
pd_common_onlytevsimbad.to_csv("data/tevsimbad.txt", sep='\t')

In [30]:
only_gev_data.to_csv("data/gev.txt", sep='\t')
only_tev_data.to_csv("data/tev.txt", sep='\t')
pd_simbad.to_csv("data/simbad.txt", sep='\t')