# Find similar objects in XMM for objects in simbad, GEV and TEV catalogs
First, we import all the packages that we will need.

In [1]:
import warnings
import pandas as pd
import numpy as np
from getsimbad import get_simbad_data
from gevtev import compare_gev_tev_data
from astropy.io import fits

warnings.filterwarnings("ignore")

This constant determines a threshold for the similarity between the objects:
$$|glat_{1} - glat_{2}| <= \epsilon$$
$$\epsilon = 1,2*10^{-1}$$

In [2]:
_epsilon = 1.2e-1

The cell above loads data from SIMBAD catalog obtained by requests 'Be', 'O', 'B' and 'plsr'. If the file "data/symbad.txt" does not exist, run getsimbad.py.

In [3]:
try:
    pd_simbad = pd.read_csv("data/simbad.txt", sep='\t', encoding='utf-8')
    del pd_simbad['Unnamed: 0']
except FileNotFoundError:
    pd_simbad = get_simbad_data()


In [4]:
pd_simbad.head()

Unnamed: 0,s_MAIN_ID,s_RA,s_DEC,s_RA_PREC,s_DEC_PREC,s_COO_ERR_MAJA,s_COO_ERR_MINA,s_COO_ERR_ANGLE,s_COO_QUAL,s_COO_WAVELENGTH,...,s_FLUX_H,s_FLUX_K,s_FLUX_u,s_FLUX_g,s_FLUX_r,s_FLUX_i,s_FLUX_z,s_class,s_glat,s_glon
0,b'HD 201433A',21 08 38.894,+30 12 20.27,7.0,7.0,26.0,26.0,90.0,B,O,...,,,,,,,,plsr,-11.774311,76.107743
1,b'* nu. For',02 04 29.4462,-29 17 48.491,14.0,14.0,0.1834,0.1942,90.0,A,O,...,8.953651e-06,9.660512e-06,,,,,,plsr,-73.777993,224.887601
2,b'* phi Her',16 08 46.1668,+44 56 05.732,14.0,14.0,0.371,0.3754,90.0,A,O,...,4.677352e-05,4.786299e-05,,,,,,plsr,47.1074,70.847825
3,b'V* CG And',00 00 43.6345,+45 15 12.002,14.0,14.0,0.055,0.0422,90.0,A,O,...,3.118889e-07,3.083187e-07,,,,,,plsr,-16.702455,113.649358
4,b'HD 133029',15 00 38.7184,+47 16 38.793,14.0,14.0,0.0472,0.0421,90.0,A,O,...,2.600159e-07,2.691534e-07,,,,,,plsr,57.710074,80.191942


In [5]:
s_spectrum_columns = [
    's_FLUX_U', 
    's_FLUX_B', 
    's_FLUX_V', 
    's_FLUX_R', 
    's_FLUX_I',
    's_FLUX_G', 
    's_FLUX_J', 
    's_FLUX_H',
    's_FLUX_K', 
    's_FLUX_u',
    's_FLUX_g', 
    's_FLUX_r',
    's_FLUX_i',    
    's_FLUX_z',
    ]

In [6]:
_path_xmm = 'data/3XMM_DR7cat_slim_v1.0.fits.gz'
_names_xmm = [
    "SC_RA",
    "SC_DEC",
    "SC_POSERR",
    "SC_EP_1_FLUX",
    "SC_EP_2_FLUX",
    "SC_EP_3_FLUX",
    "SC_EP_4_FLUX",
    "SC_EP_5_FLUX",
    "SC_EP_1_FLUX_ERR",
    "SC_EP_2_FLUX_ERR",
    "SC_EP_3_FLUX_ERR",
    "SC_EP_4_FLUX_ERR",
    "SC_EP_5_FLUX_ERR",
    ] 

In [7]:
hdul_xmm = fits.open(_path_xmm)
cat_xmm = hdul_xmm[1].data

In [8]:
def create_pandas_frames(cat, cat_name):   
    """
    Creates pandas dataframes with the same values as in cat, adding to columns cat_name.
    
    Arguments:
    cat -- rec array with catalog data
    cat_name -- short catalog name
    
    Returns:
    data -- pandas DataFrame with catalog data
    """
    
    data = pd.DataFrame.from_records(cat.tolist(), columns=cat.dtype.names)
    match_names = {}
    for i in data.columns:
        match_names.update({i : cat_name + i})
    data = data.rename(columns = match_names)
    
    return data

In [9]:
data_xmm = create_pandas_frames(cat_xmm, "xmm_")
data_xmm.head()

Unnamed: 0,xmm_SRCID,xmm_IAUNAME,xmm_SC_RA,xmm_SC_DEC,xmm_SC_POSERR,xmm_SC_DET_ML,xmm_SC_EP_1_FLUX,xmm_SC_EP_1_FLUX_ERR,xmm_SC_EP_2_FLUX,xmm_SC_EP_2_FLUX_ERR,...,xmm_SC_SUM_FLAG,xmm_SC_EP_8_FMIN,xmm_SC_EP_8_FMIN_ERR,xmm_SC_EP_8_FMAX,xmm_SC_EP_8_FMAX_ERR,xmm_MJD_FIRST,xmm_MJD_LAST,xmm_N_DETECTIONS,xmm_CONFUSED,xmm_WEBPAGE_URL
0,206931901010113,b'3XMM J000000.0-552108',0.000395,-55.352442,2.31261,11.9614,5.97519e-17,1.08093e-16,7.73947e-16,2.52081e-16,...,0,6.01835e-15,5.19133e-15,6.01835e-15,5.19133e-15,56086.742951,56087.588194,1,70,b'http://xmm-catalog.irap.omp.eu/source/206931...
1,204033901010030,b'3XMM J000000.2+623122',0.001138,62.523022,1.81998,24.577,2.0843800000000003e-17,6.75412e-17,1.4230100000000003e-17,9.893530000000001e-17,...,0,7.4434e-15,1.95237e-15,7.4434e-15,1.95237e-15,54134.485278,54135.0436,1,70,b'http://xmm-catalog.irap.omp.eu/source/204033...
2,201253101010082,b'3XMM J000000.3-250629',0.001307,-25.108284,2.15064,10.5716,1.94644e-16,2.2424e-16,1.11621e-15,3.94971e-16,...,0,5.88798e-15,3.77133e-15,5.88798e-15,3.77133e-15,51696.44088,51696.78809,1,70,b'http://xmm-catalog.irap.omp.eu/source/201253...
3,206931901010049,b'3XMM J000000.3-552139',0.001344,-55.361047,1.86001,48.571301,7.58234e-16,2.27697e-16,1.87544e-15,3.4754e-16,...,0,1.66937e-14,6.45224e-15,1.66937e-14,6.45224e-15,56086.742951,56087.588194,1,70,b'http://xmm-catalog.irap.omp.eu/source/206931...
4,203025805010073,b'3XMM J000000.7-321352',0.003264,-32.231256,1.94055,14.0141,2.41177e-16,1.39701e-16,7.00008e-16,2.40875e-16,...,0,6.31608e-15,3.75948e-15,6.31608e-15,3.75948e-15,53534.288912,53534.827627,1,70,b'http://xmm-catalog.irap.omp.eu/source/203025...


Load data from gevtevsimbad, gevsimbad and tevsimbad:

In [10]:
common_data, only_tev_data, only_gev_data = compare_gev_tev_data(_epsilon)

In [11]:
def create_matrix_common(cat1, cat2, epsilon, cat1_col1, cat1_col2, cat2_col1, cat2_col2):
    """This function looks for the same objects in GeV and TeV and Simbad catalogs 
    
    Return: a matrix with association: m[i][j] = True if object i from cat1
            is close enough to object j from cat2
    
    cat1, cat2 - a pandas dataFrames 
    epsilon(double) - distance accepted as equivalence
    """
    d = {}
    classes = {}
        
    
    C_associations_1 = -1.0 * np.ones((len(cat1[cat1_col1])))
    C_associations_2 = -1.0 * np.ones((len(cat2[cat2_col1])))
    
    col1_dif_matrix = np.dot(np.vstack((cat1[cat1_col1], -np.ones_like(cat1[cat1_col1]))).T,
                            np.vstack((np.ones_like(cat2[cat2_col1]), cat2[cat2_col1])))
    col2_dif_matrix = np.dot(np.vstack((cat1[cat1_col2], -np.ones_like(cat1[cat1_col2]))).T,
                            np.vstack((np.ones_like(cat2[cat2_col2]), cat2[cat2_col2])))
    pairs_matrix = np.logical_and(np.abs(col1_dif_matrix) < epsilon, 
                                np.abs(col2_dif_matrix) < epsilon)

    return pairs_matrix

In [12]:
from tqdm import tqdm

In [13]:
def create_common_data(data1, data2, pairs_matrix, cat1_col1, cat1_col2, cat2_col1, cat2_col2):
    """The function adds objects found in both catalogs.
    """
    vector_association = np.where(np.sum(pairs_matrix, axis=0) > 0)[0]
    pd_common = pd.DataFrame()
    for i in tqdm(vector_association):
        data1_join = (pairs_matrix[:, i] > 0)*(i + 1) - 1
        data1["join"] = data1_join
        pd_common0 = pd.merge(data1, data2, right_index=True, left_on='join', how='inner')
        if (len(pd_common)):
            pd_common = pd_common.append(pd_common0)
            del pd_common["join"]
            del data1["join"]
        else:
            pd_common = pd_common0.copy()
    
      
    array_non_duplicate = [cat1_col1, cat1_col2, cat2_col1, cat2_col2]
    pd_common = pd_common.drop_duplicates(array_non_duplicate)
#     pd_common = pd_common.reset_index()

    return pd_common

In [14]:
matrix = create_matrix_common(common_data, pd_simbad, _epsilon, 'tev_glat', 'tev_glon', "s_glat", "s_glon")
pd_common_gevtevsimbad = create_common_data(common_data, pd_simbad, matrix, 'tev_glat', 'tev_glon', "s_glat", "s_glon")
print("There are " + str(len(pd_common_gevtevsimbad)) + " common objects in the three catalogs.")

100%|██████████| 69/69 [00:02<00:00, 25.94it/s]


There are 68 common objects in the three catalogs.


In [15]:
matrix = create_matrix_common(pd_common_gevtevsimbad, data_xmm, _epsilon, "gev_RAJ2000", "gev_DEJ2000", "xmm_SC_RA", "xmm_SC_DEC")

In [16]:
pd_common_gevtevsimbadxmm = create_common_data(pd_common_gevtevsimbad, data_xmm, matrix, "gev_RAJ2000", "gev_DEJ2000", "s_glat", "s_glon")
print("There are " + str(len(pd_common_gevtevsimbadxmm)) + " common objects in the three catalogs with xmm.")

100%|██████████| 1135/1135 [03:39<00:00,  5.17it/s]


There are 67 common objects in the three catalogs with xmm.


In [17]:
pd_common_gevtevsimbadxmm.head()

Unnamed: 0,gev_0FGL_Name,gev_1FGL_Name,gev_1FHL_Name,gev_2FGL_Name,gev_ASSOC1,gev_ASSOC2,gev_ASSOC_GAM1,gev_ASSOC_GAM2,gev_ASSOC_GAM3,gev_ASSOC_TEV,...,xmm_SC_HR3,xmm_SC_HR3_ERR,xmm_SC_HR4,xmm_SC_HR4_ERR,xmm_SC_POSERR,xmm_SC_RA,xmm_SC_SUM_FLAG,xmm_SC_VAR_FLAG,xmm_SRCID,xmm_WEBPAGE_URL
11,b'0FGL J0240.3+6113',b'1FGL J0240.5+6113 ',b'1FHL J0240.2+6113 ',b'2FGL J0240.5+6113 ',b'LS I+61 303 ',b' ',b'1AGL J0242+6111',b' ',b'EGR J0240+6112 ',b'LS I +61 303 ',...,-1.0,0.621338,1.0,0.583818,2.281,40.022122,0,0,205059811010050,b'http://xmm-catalog.irap.omp.eu/source/205059...
35,b' ',b'1FGL J0956.5+6938 ',b' ',b'2FGL J0955.9+6936 ',b'M 82 ',b' ',b' ',b' ',b' ',b'M 82 ',...,-0.025561,0.03054,-0.291307,0.039613,0.268383,148.765898,3,70,201122902010022,b'http://xmm-catalog.irap.omp.eu/source/201122...
35,b' ',b'1FGL J0956.5+6938 ',b' ',b'2FGL J0955.9+6936 ',b'M 82 ',b' ',b' ',b' ',b' ',b'M 82 ',...,-0.025561,0.03054,-0.291307,0.039613,0.268383,148.765898,3,70,201122902010022,b'http://xmm-catalog.irap.omp.eu/source/201122...
35,b' ',b'1FGL J0956.5+6938 ',b' ',b'2FGL J0955.9+6936 ',b'M 82 ',b' ',b' ',b' ',b' ',b'M 82 ',...,-0.025561,0.03054,-0.291307,0.039613,0.268383,148.765898,3,70,201122902010022,b'http://xmm-catalog.irap.omp.eu/source/201122...
35,b' ',b'1FGL J0956.5+6938 ',b' ',b'2FGL J0955.9+6936 ',b'M 82 ',b' ',b' ',b' ',b' ',b'M 82 ',...,-0.025561,0.03054,-0.291307,0.039613,0.268383,148.765898,3,70,201122902010022,b'http://xmm-catalog.irap.omp.eu/source/201122...


The next cells find common values in simbad-tev

In [18]:
matrix = create_matrix_common(only_tev_data, pd_simbad, _epsilon, 'tev_glat', 'tev_glon', "s_glat", "s_glon")
pd_common_onlytevsimbad = create_common_data(only_tev_data, pd_simbad, matrix, 'tev_glat', 'tev_glon', "s_glat", "s_glon")
print("There are " + str(len(pd_common_onlytevsimbad)) + " common objects in the tev-simbad catalogs.")

100%|██████████| 126/126 [00:03<00:00, 33.74it/s]


There are 123 common objects in the tev-simbad catalogs.


In [19]:
matrix = create_matrix_common(pd_common_onlytevsimbad, data_xmm, _epsilon, "tev_ra", "tev_dec", "xmm_SC_RA", "xmm_SC_DEC")
matrix.shape

(123, 499266)

In [20]:
pd_common_onlytevsimbadxmm = create_common_data(pd_common_onlytevsimbad, data_xmm, matrix, "tev_ra", "tev_dec", "s_glat", "s_glon")
print("There are " + str(len(pd_common_onlytevsimbadxmm)) + " common objects in the tev-simbad catalog with xmm.")

100%|██████████| 1761/1761 [06:11<00:00,  4.75it/s]


There are 123 common objects in the tev-simbad catalog with xmm.


The next cells find common values in simbad-gev

In [21]:
matrix = create_matrix_common(only_gev_data, pd_simbad, _epsilon, 'gev_GLAT', 'gev_GLON', "s_glat", "s_glon")
pd_common_onlygevsimbad = create_common_data(only_gev_data, pd_simbad, matrix, 'gev_GLAT', 'gev_GLON', "s_glat", "s_glon")
print("There are " + str(len(pd_common_onlygevsimbad)) + " common objects in the gev-simbad catalogs.")

100%|██████████| 159/159 [00:05<00:00, 27.21it/s]


There are 155 common objects in the gev-simbad catalogs.


In [22]:
matrix = create_matrix_common(pd_common_onlygevsimbad, data_xmm, _epsilon, "gev_RAJ2000", "gev_DEJ2000", "xmm_SC_RA", "xmm_SC_DEC")
matrix.shape

(155, 499266)

In [23]:
pd_common_onlygevsimbadxmm = create_common_data(pd_common_onlygevsimbad, data_xmm, matrix, "gev_RAJ2000", "gev_DEJ2000", "s_glat", "s_glon")
print("There are " + str(len(pd_common_onlygevsimbadxmm)) + " common objects in the gev-simbad catalog with xmm.")

100%|██████████| 2441/2441 [07:26<00:00,  5.47it/s]


There are 109 common objects in the gev-simbad catalog with xmm.


In [24]:
pd_common_onlygevsimbadxmm.head()

Unnamed: 0,gev_0FGL_Name,gev_1FGL_Name,gev_1FHL_Name,gev_2FGL_Name,gev_ASSOC1,gev_ASSOC2,gev_ASSOC_GAM1,gev_ASSOC_GAM2,gev_ASSOC_GAM3,gev_ASSOC_TEV,...,xmm_SC_HR3,xmm_SC_HR3_ERR,xmm_SC_HR4,xmm_SC_HR4_ERR,xmm_SC_POSERR,xmm_SC_RA,xmm_SC_SUM_FLAG,xmm_SC_VAR_FLAG,xmm_SRCID,xmm_WEBPAGE_URL
86,b' ',b' ',b'1FHL J0040.3+4049 ',b' ',b'B3 0037+405 ',b' ',b' ',b' ',b' ',b' ',...,-0.153511,0.178042,0.293061,0.199234,1.20314,9.96372,1,0,204025605010130,b'http://xmm-catalog.irap.omp.eu/source/204025...
86,b' ',b' ',b'1FHL J0040.3+4049 ',b' ',b'B3 0037+405 ',b' ',b' ',b' ',b' ',b' ',...,-0.153511,0.178042,0.293061,0.199234,1.20314,9.96372,1,0,204025605010130,b'http://xmm-catalog.irap.omp.eu/source/204025...
86,b' ',b' ',b'1FHL J0040.3+4049 ',b' ',b'B3 0037+405 ',b' ',b' ',b' ',b' ',b' ',...,-0.153511,0.178042,0.293061,0.199234,1.20314,9.96372,1,0,204025605010130,b'http://xmm-catalog.irap.omp.eu/source/204025...
86,b' ',b' ',b'1FHL J0040.3+4049 ',b' ',b'B3 0037+405 ',b' ',b' ',b' ',b' ',b' ',...,-0.153511,0.178042,0.293061,0.199234,1.20314,9.96372,1,0,204025605010130,b'http://xmm-catalog.irap.omp.eu/source/204025...
90,b' ',b' ',b' ',b'2FGL J0042.5+4114 ',b'M31 ',b' ',b' ',b' ',b' ',b' ',...,0.001301,0.098734,-0.365255,0.155236,0.744193,10.51067,1,0,201092701010170,b'http://xmm-catalog.irap.omp.eu/source/201092...


In [25]:
pd_common_gevtevsimbadxmm.to_csv("data/gevtevsimbadxmm.txt", sep='\t')
pd_common_onlygevsimbadxmm.to_csv("data/gevsimbadxmm.txt", sep='\t')
pd_common_onlytevsimbadxmm.to_csv("data/tevsimbadxmm.txt", sep='\t')

In [26]:
gevtev_simbadclasses = pd.read_csv("data/gevtev_simbadclasses.txt", sep='\t', encoding='utf-8')

In [27]:
matrix = create_matrix_common(gevtev_simbadclasses, data_xmm, _epsilon, "gev_RAJ2000", "gev_DEJ2000", "xmm_SC_RA", "xmm_SC_DEC")

In [28]:
pd_common_gevtevxmm = create_common_data(gevtev_simbadclasses, data_xmm, matrix, "gev_RAJ2000", "gev_DEJ2000", "xmm_SC_RA", "xmm_SC_DEC")

100%|██████████| 4554/4554 [14:17<00:00,  5.31it/s]


In [29]:
print("There are " + str(len(pd_common_gevtevxmm)) + " common objects in the gev-tev catalog with xmm.")

There are 5067 common objects in the gev-tev catalog with xmm.


In [30]:
pd_common_gevtevxmm.to_csv("data/gevtev_simbadclasses_xmm.txt", sep='\t')

In [31]:
pd_common_gevtevxmm = pd.read_csv("data/gevtev_simbadclasses_xmm.txt", sep='\t')
pd_common_gevtevxmm = pd_common_gevtevxmm.loc[:, ~pd_common_gevtevxmm.columns.str.contains('^Unnamed')]
pd_common_gevtevxmm = pd_common_gevtevxmm.loc[:, ~pd_common_gevtevxmm.columns.str.contains('^marked')]
pd_common_gevtevxmm.to_csv('data/gevtevxmm_s.txt',sep = '\t')

In [32]:
print(len(pd_common_gevtevxmm['gev_RAJ2000'].unique()))
print(len(gevtev_simbadclasses['gev_RAJ2000'].unique()))

71
105
