# Similar objects in GEV and TEV catalogs

In [1]:
import numpy as np
import pandas as pd

from astropy.io import fits

#fits_image_filename = fits.util.get_testdata_filepath()

In [2]:
_epsilon = 1e-2

In [3]:
_path_gev = 'data/gll_psc_v16.fit'
_path_tev = 'data/gammacat.fits.gz'

In [4]:
_names_gev = [
    'CLASS1',    
    'RAJ2000',
    'DEJ2000', 
    'GLON', 
    'GLAT', 
    'Variability_Index', 
    'Flux1000', 
    'Flux10000_100000', 
    'Flux1000_3000', 
    'Flux100_300', 
    'Flux3000_10000', 
    'Flux300_1000',  
    'Flux30_100',
    'ASSOC_TEV'
    ] 

In [5]:
_names_tev = [
    'classes', 
    'glat', 
    'glon', 
    'morph_pa', 
    'pos_ra',
    'pos_dec',
    'sed_dnde', 
    'sed_dnde_err', 
    'sed_e_ref', 
    'spec_dnde_1TeV', 
    'spec_dnde_1TeV_err', 
    'spec_eflux_1TeV_10TeV', 
    'spec_eflux_1TeV_10TeV_err', 
    'spec_flux_1TeV', 
    'spec_flux_1TeV_crab', 
    'spec_flux_1TeV_crab_err'
    ]

In [6]:
_names_common = [
    'glat',
    'glon',
    'morph_pa',
    'pos_ra',
    'pos_dec',
    'sed_dnde',
    'sed_dnde_err',
    'sed_e_ref', 
    'spec_dnde_1TeV', 
    'spec_dnde_1TeV_err', 
    'spec_eflux_1TeV_10TeV', 
    'spec_eflux_1TeV_10TeV_err', 
    'spec_flux_1TeV', 
    'spec_flux_1TeV_crab', 
    'spec_flux_1TeV_crab_err', 
    'ASSOC_TEV', 
    'Variability_Index', 
    'Flux1000', 
    'Flux10000_100000', 
    'Flux1000_3000', 
    'Flux100_300', 
    'Flux3000_10000', 
    'Flux300_1000', 
    'Flux30_100', 
    'Flux1000', 
    'Flux10000_100000', 
    'Flux1000_3000', 
    'Flux100_300', 
    'Flux3000_10000', 
    'Flux300_1000', 
    'Flux30_100', 
    'CLASS1'
    ]

In [7]:
_gevToTev = {'BLL': 'blazar', 
            'FRSQ': 'frsq', 
            'HMB': 'bin' , 
            'BIN': 'bin', 
            'GAL': 'galaxy', 
            'PSR': 'psr', 
            'PWN': 'pwn', 
            'SNR': 'snr', 
            '': 'unid'}
_tevToGev = {v:k for k, v in _gevToTev.items()}

We take into account only these classes:

In [8]:
_interesting_types = {'BLL', 'blazar', 
            'FRSQ', 'frsq', 
            'HMB', 'bin' , 
            'BIN', 'bin', 
            'GAL', 'galaxy', 
            'PSR', 'psr', 
            'PWN', 'pwn', 
            'SNR', 'snr', 
            '', 'unid'}

In [9]:
def cat_gev_tev(path_gev, path_tev):    
    hdul_tev = fits.open(path_tev)
    cat_tev = hdul_tev[1].data
    hdul_gev = fits.open(path_gev)
    cat_gev = hdul_gev[1].data
    return cat_gev, cat_tev

In [10]:
def create_common(cat_gev, cat_tev, epsilon):
    """
    Returns 2 vectors for gev and tev respectively.
    C_associations_gev coordinate is equal 
    
    -1, if no similar objects in TEV catalog found
    i, where i is a corresponding index of a similar object from TEV
        to the object from GEV with an index equal to number 
        of the coordinate.
    
    Arguments:
    cat_gev -- rec array with GEV data
    cat_tev -- rec array with TEV data
    epsilon - threshold for similarity
    
    Returns:
    C_associations_gev - numpy array (n,)
    C_associations_tev - numpy array (m, )
    
    n - number of examples in GEV
    m - number of examples in TEV
    """
    
    class_gev = cat_gev['CLASS1']
    class_tev = cat_tev['classes']
    glat_gev = cat_gev['GLAT']
    glat_tev = cat_tev['glat']
    glon_gev = cat_gev['GLON']
    glon_tev = cat_tev['glon']
    
    C_associations_gev = -1.0 * np.ones((len(glat_gev)))
    C_associations_tev = -1.0 * np.ones((len(glat_tev)))
    
    for i in range(len(glat_gev)):
        for j in range(len(glat_tev)):
            classGeV = class_gev[i]
            classTeV = class_tev[j]
            if (classGeV in _interesting_types and classTeV in _interesting_types):
                if ((np.abs(glat_gev[i] - glat_tev[j])/np.abs(glat_gev[i]) < epsilon) and (np.abs(glon_gev[i] - glon_tev[j])//np.abs(glon_gev[j]) < epsilon)) :
                    C_associations_gev[i] = j
                    C_associations_tev[j] = i
    return C_associations_gev, C_associations_tev

In [11]:
def create_pandas_frames(cat_gev, cat_tev):   
    """
    Creates pandas dataframes with the same values as in cat_gev 
    and cat_tev, adding to columns names "gev_" and "tev_" 
    respectively.
    
    Arguments:
    cat_gev -- rec array with GEV data
    cat_tev -- rec array with TEV data
    
    Returns:
    data_gev -- pandas DataFrame with GEV data
    data_tev -- pandas DataFrame with TEV data
    """
    data_gev = pd.DataFrame.from_records(cat_gev.tolist(), columns=cat_gev.dtype.names)[_names_gev]
    gev_match_names = {}
    for i in data_gev.columns:
        gev_match_names.update({i : "gev_" + i})
    data_gev = data_gev.rename(columns = gev_match_names)

    data_tev = pd.DataFrame.from_records(cat_tev.tolist(), columns=cat_tev.dtype.names)[_names_tev]
    tev_match_names = {}
    for i in data_tev.columns:
        tev_match_names.update({i : "tev_" + i})
    data_tev = data_tev.rename(columns = tev_match_names)
    return data_gev, data_tev

In [12]:
def create_common_data(data_gev, data_tev, C_associations_gev, C_associations_tev):
    """
    The fonction adds objects found both in GeV and TeV.
    
    Arguments:
    data_gev -- pandas DataFrame with GEV data
    data_tev -- pandas DataFrame with TEV data
    C_associations_gev - numpy array (n,)
    C_associations_tev - numpy array (m, )
    
    n - number of examples in GEV
    m - number of examples in TEV
    
    Returns:
    pd_common_gevtev - pandas DataFrame with all chosen columns 
    from GEV and TEV
    """
    data_gev['join'] = C_associations_gev
    pd_common_gevtev = pd.merge(data_tev, data_gev, left_index=True, right_on='join', how='inner')
    data_tev['join'] = C_associations_tev
    pd_common_gevtev0 = pd.merge(data_tev, data_gev, right_index=True, left_on='join', how='inner')
    
    pd_common_gevtev = pd_common_gevtev.append(pd_common_gevtev0)
    array_non_duplicate = ['tev_glon', 'gev_GLAT', 'gev_GLON', 'tev_glat', 'gev_CLASS1', 'tev_classes']
    pd_common_gevtev = pd_common_gevtev.drop_duplicates(array_non_duplicate)
    del pd_common_gevtev['join']
    pd_common_gevtev = pd_common_gevtev.reset_index()
    #df_common = pd.DataFrame(data = data, columns = namefinal)
    return pd_common_gevtev

In [13]:
def create_only_tev_data(data_tev, C_associations_tev):
    """The fonction adds objects found only in TeV.
    """
    data_tev['join'] = C_associations_tev
    data_only_tev = data_tev[data_tev['join'] >= 0]
    del data_only_tev['join']
    return data_only_tev

def create_only_gev_data(data_gev, C_associations_gev):
    """The fonction adds objects found only in GeV.
    """
    data_gev['join'] = C_associations_gev
    data_only_gev = data_gev[data_gev['join'] >= 0]
    del data_only_gev['join']
    return data_only_gev

In [14]:
def compare_gev_tev_data():
    """
    The fonction returns common objects for GEV and TEV,
    only GEV and only TEV objects.
    
    Returns:
    common_data - pandas DataFrame with common for GEV and TEV objects 
    only_tev_data - pandas DataFrame of only TEV objects 
    only_gev_data - pandas DataFrame of only GEV objects 
    """
    cat_gev, cat_tev = cat_gev_tev(_path_gev, _path_tev)
    C_associations_gev, C_associations_tev = create_common(cat_gev, cat_tev, _epsilon)
    data_gev, data_tev = create_pandas_frames(cat_gev, cat_tev)
    common_data = create_common_data(data_gev, data_tev, C_associations_gev, C_associations_tev)
    only_tev_data = create_only_tev_data(data_tev, C_associations_tev)
    only_gev_data = create_only_gev_data(data_gev, C_associations_gev)
    return common_data, only_tev_data, only_gev_data

In [15]:
common_data, only_tev_data, only_gev_data = compare_gev_tev_data()

In [16]:
common_data.head()

Unnamed: 0,index,gev_ASSOC_TEV,gev_CLASS1,gev_DEJ2000,gev_Flux1000,gev_Flux10000_100000,gev_Flux1000_3000,gev_Flux100_300,gev_Flux3000_10000,gev_Flux300_1000,...,tev_sed_dnde,tev_sed_dnde_err,tev_sed_e_ref,tev_spec_dnde_1TeV,tev_spec_dnde_1TeV_err,tev_spec_eflux_1TeV_10TeV,tev_spec_eflux_1TeV_10TeV_err,tev_spec_flux_1TeV,tev_spec_flux_1TeV_crab,tev_spec_flux_1TeV_crab_err
0,241,b'3C 58 ',b'PSR ',64.815002,5.681968e-09,3.454686e-11,4.941771e-09,7.102287e-08,7.153675e-10,2.267299e-08,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",2e-13,4e-14,4.821693e-13,1.348196e-13,1.428571e-13,0.688653,0.169258
1,2352,b' ',b' ',-6.3395,1.8706e-09,4.493892e-11,1.022248e-09,4.386777e-08,6.768377e-10,3.591784e-09,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",2e-13,4e-14,4.821693e-13,1.348196e-13,1.428571e-13,0.688653,0.169258
2,74,b' ',b' ',39.2131,2.375056e-10,1.397146e-11,1.368538e-10,1.33847e-08,4.289723e-11,6.370846e-10,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,
3,2907,b' ',b' ',33.141998,4.156732e-10,4.373509e-16,4.395126e-10,9.417974e-10,4.708702e-11,2.443668e-09,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,
4,3029,b' ',b' ',38.453899,3.808875e-10,3.489049e-11,2.979559e-10,2.920173e-12,7.908256e-11,1.072957e-09,...,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,


In [17]:
only_tev_data.head()

Unnamed: 0,tev_classes,tev_glat,tev_glon,tev_morph_pa,tev_pos_ra,tev_pos_dec,tev_sed_dnde,tev_sed_dnde_err,tev_sed_e_ref,tev_spec_dnde_1TeV,tev_spec_dnde_1TeV_err,tev_spec_eflux_1TeV_10TeV,tev_spec_eflux_1TeV_10TeV_err,tev_spec_flux_1TeV,tev_spec_flux_1TeV_crab,tev_spec_flux_1TeV_crab_err
8,b'pwn',3.083848,130.717819,,31.379168,64.849998,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",2e-13,4e-14,4.821693e-13,1.348196e-13,1.428571e-13,0.688653,0.169258
9,b'blazar',-23.486858,142.601715,,,,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,
11,b'unid',-16.766897,140.14296,,,,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,
13,b'bin',1.086135,135.675278,,,,"[3.2234e-12, 9.27542e-13, 2.32469e-13, 1.6299e...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.598415, 0.878288, 1.28897, 1.89222, 2.7774,...",4.8e-12,4e-13,1.203735e-11,1.751546e-12,3.529412e-12,17.013777,2.064908
24,b'psr',-5.784366,184.557465,,,,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,,,,,,


In [18]:
only_gev_data.head()

Unnamed: 0,gev_CLASS1,gev_RAJ2000,gev_DEJ2000,gev_GLON,gev_GLAT,gev_Variability_Index,gev_Flux1000,gev_Flux10000_100000,gev_Flux1000_3000,gev_Flux100_300,gev_Flux3000_10000,gev_Flux300_1000,gev_Flux30_100,gev_ASSOC_TEV
74,b' ',8.1464,39.2131,118.951416,-23.512846,35.193081,2.375056e-10,1.397146e-11,1.368538e-10,1.33847e-08,4.289723e-11,6.370846e-10,,b' '
241,b'PSR ',31.3957,64.815002,130.718506,3.070137,37.425854,5.681968e-09,3.454686e-11,4.941771e-09,7.102287e-08,7.153675e-10,2.267299e-08,,b'3C 58 '
278,b'BLL ',35.6735,43.032902,140.150467,-16.766998,885.043457,1.92784e-08,1.656387e-09,1.250112e-08,8.469462e-08,4.820046e-09,3.956647e-08,,b'3C 66A '
282,b' ',35.906101,62.0811,133.496384,1.115149,41.772064,5.665227e-09,6.462911e-15,4.542899e-09,4.935485e-08,6.58946e-10,1.139064e-08,,b' '
289,b' ',36.472198,61.987,133.77803,1.120675,46.688057,3.305842e-09,2.326516e-12,2.725758e-09,5.865269e-11,2.874661e-10,1.401843e-08,,b' '
