# MLE matching verification

Previously done, the cross-matching using Minimum Likelihood Estimator should now be verified. We cross-matched the x-ray sources of LaMassa 2016+ with UKIDSS-dr8 and stored the results in a csv file named `lamassa_ukidss_MatchMLE.csv`

There are two ways of checking the results. One is to look straight to the cross-match (with UKIDSS data) previously done by LaMassa 2016+ and see if the sources are the same. The other way of doing it is actually a comparison of methods, where we would evaluate the discrepancy of MLE with a well known technique, Nearest Neighbors; that would *not* evaluate the method itself, but the overall quality of the results spoting something absurd if there is.

In [8]:
from booq import table

In [11]:
tab_lm = table.ATable.read('lamassa/cds/xmmao13.dat',readme='lamassa/cds/ReadMe',format='cds')
tab_lm.colnames



['Seq',
 'ObsID',
 'RAdeg',
 'DEdeg',
 'e_Pos',
 'DistNN',
 'ExtFlag',
 'InXMM',
 'InChandra',
 'FSoft',
 'e_FSoft',
 'CtSoft',
 'SoftDetml',
 'FHard',
 'e_FHard',
 'CtHard',
 'HardDetml',
 'FFull',
 'e_FFull',
 'CtFull',
 'FullDetml',
 'logLSoft',
 'logLHard',
 'logLFull',
 'RejS',
 'SDSS',
 'RASdeg',
 'DESdeg',
 'RelS',
 'rS',
 'CoaddS',
 'umag',
 'e_umag',
 'gmag',
 'e_gmag',
 'rmag',
 'e_rmag',
 'imag',
 'e_imag',
 'zmag',
 'e_zmag',
 'SpecID',
 'Class',
 'zsp',
 'r_zsp',
 'WISE',
 'RAWdeg',
 'DEWdeg',
 'e_RAWdeg',
 'e_DEWdeg',
 'RelW',
 'rW',
 'W1mag',
 'e_W1mag',
 'W1SNR',
 'W2mag',
 'e_W2mag',
 'W2SNR',
 'W3mag',
 'e_W3mag',
 'W3SNR',
 'W4mag',
 'e_W4mag',
 'W4SNR',
 'ExtW',
 'RejW',
 'UKIDSS',
 'RAUdeg',
 'DEUdeg',
 'RelU',
 'rU',
 'Ymag',
 'e_Ymag',
 'Jmag',
 'e_Jmag',
 'Hmag',
 'e_Hmag',
 'Kmag',
 'e_Kmag',
 'RejU',
 'VHS',
 'RAVdeg',
 'DEVdeg',
 'RelV',
 'rV',
 'JVmag',
 'e_JVmag',
 'HVmag',
 'e_HVmag',
 'KVmag',
 'e_KVmag',
 'RejV',
 'GALEX',
 'RAGdeg',
 'DEGdeg',
 'e_NUVPo

In [30]:
columns = ['Seq',
 'ObsID',
 'RAdeg',
 'DEdeg',
 'e_Pos',
 'DistNN',
 'UKIDSS',
 'RAUdeg',
 'DEUdeg',
 'RelU',
 'rU',
 'Ymag',
 'e_Ymag',
 'Jmag',
 'e_Jmag',
 'Hmag',
 'e_Hmag',
 'Kmag',
 'e_Kmag',
 'RejU']

ID_column = 'Seq'

df_lm = tab_lm[columns].to_dataframe()

df_lm[ID_column] = df_lm[ID_column].astype(int).astype(str)
df_lm.set_index(ID_column,inplace=True)
df_lm

Unnamed: 0_level_0,ObsID,RAdeg,DEdeg,e_Pos,DistNN,UKIDSS,RAUdeg,DEUdeg,RelU,rU,Ymag,e_Ymag,Jmag,e_Jmag,Hmag,e_Hmag,Kmag,e_Kmag,RejU
Seq,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2359,742830101.0,14.097,0.166,4.7,515.8,,,,,,,,,,,,,,no
2360,742830101.0,14.115,-0.353,3.5,171.7,433834423123,14.115,-0.352,0.936,3.51,17.92,0.02,17.25,0.02,16.42,0.03,15.64,0.02,no
2361,742830101.0,14.115,-0.160,5.8,291.7,433834423565,14.115,-0.160,0.826,1.08,,,19.49,0.15,18.64,0.20,17.88,0.15,no
2362,742830101.0,14.142,-0.442,3.4,49.1,,,,,,,,,,,,,,no
2363,742830101.0,14.154,-0.448,2.1,49.1,433836362494,14.155,-0.447,0.977,1.81,18.22,0.04,17.87,0.04,17.17,0.05,16.25,0.03,no
2364,742830101.0,14.162,-0.357,5.2,145.3,,,,,,,,,,,,,,no
2365,742830101.0,14.162,0.038,4.0,224.1,433832563084,14.162,0.040,0.843,6.21,18.88,0.05,18.50,0.06,17.78,0.10,17.45,0.10,no
2366,742830101.0,14.163,-0.400,3.8,29.8,433836363120,14.164,-0.399,0.877,2.61,,,,,,,18.11,0.19,no
2367,742830101.0,14.170,-0.244,2.7,283.6,,,,,,,,,,,,,,no
2368,742830101.0,14.172,-0.400,4.3,29.8,433836362527,14.172,-0.398,0.764,6.36,20.28,0.24,19.44,0.17,18.43,0.15,17.25,0.09,no


In [29]:
import pandas
df_mle = pandas.read_csv('lamassa_ukidss_MatchMLE.csv')

df_mle[ID_column] = df_mle[ID_column].astype(int).astype(str)
df_mle.set_index(ID_column, inplace=True)
df_mle

Unnamed: 0_level_0,SOURCEID,ObsID,RAdeg,DEdeg,e_Pos,DistNN,ExtFlag,InXMM,InChandra,FSoft,...,HAPERMAG3,HAPERMAG3ERR,KHALLMAG,KHALLMAGERR,KPETROMAG,KPETROMAGERR,KPSFMAG,KPSFMAGERR,KAPERMAG3,KAPERMAG3ERR
Seq,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2360,433834423123,742830101.0,14.115,-0.353,3.5,171.7,0.0,no,no,1.68,...,16.420605,0.027337,15.266109,0.031406,15.165094,0.047093,,,15.644726,0.019929
2361,433834423565,742830101.0,14.115,-0.160,5.8,291.7,0.0,no,no,2.02,...,18.644693,0.198955,17.770628,0.234180,17.763853,0.258605,,,17.884663,0.145093
2362,433836362467,742830101.0,14.142,-0.442,3.4,49.1,0.0,no,no,0.95,...,17.760912,0.083793,16.968500,0.112740,16.943735,0.177616,,,17.142780,0.078272
2363,433836362494,742830101.0,14.154,-0.448,2.1,49.1,0.0,no,no,7.25,...,17.173470,0.049189,16.122658,0.049313,15.984794,0.066120,,,16.248377,0.034981
2365,433832563084,742830101.0,14.162,0.038,4.0,224.1,0.0,no,no,1.64,...,17.777243,0.096601,17.553614,0.169763,17.494886,0.185832,,,17.446341,0.103341
2366,433836363120,742830101.0,14.163,-0.400,3.8,29.8,0.0,no,no,1.24,...,,,17.838139,0.317437,17.724998,0.303445,,,18.112112,0.189446
2368,433836362527,742830101.0,14.172,-0.400,4.3,29.8,0.0,no,no,0.51,...,18.432409,0.153373,17.352652,0.137550,17.437912,0.219109,,,17.248894,0.086354
2369,433836363145,742830101.0,14.186,-0.389,4.4,62.9,0.0,no,no,0.00,...,,,17.910995,0.299177,17.910995,0.299177,,,18.370745,0.240838
2370,433834437127,742830101.0,14.196,-0.166,3.6,226.1,0.0,no,no,0.59,...,,,18.955881,0.764128,18.777044,0.610249,,,18.318068,0.225548
2371,433836363155,742830101.0,14.197,-0.577,3.4,328.9,0.0,no,no,1.67,...,,,17.414028,0.201721,17.780397,0.396348,,,17.672789,0.126915


First of all, let's check how many matches we have in each table.

In [21]:
# Lamassa
sum(~df_lm['UKIDSS'].isnull())

1784

In [19]:
# Mine
len(df_mle)

1492

Of those matches in LaMassa's, how many are the same matching pair

In [57]:
df_pairs = df_mle[['SOURCEID']]
df_pairs = pandas.merge(df_pairs, df_lm[['UKIDSS']], right_index=True, left_index=True)
df_pairs.head()

Unnamed: 0_level_0,SOURCEID,UKIDSS
Seq,Unnamed: 1_level_1,Unnamed: 2_level_1
2360,433834423123,433834423123.0
2361,433834423565,433834423565.0
2362,433836362467,
2363,433836362494,433836362494.0
2365,433832563084,433832563084.0


In [58]:
for col in df_pairs.columns:
    df_pairs[col] = df_pairs[col].astype(str)
    
df_pairs.head()

Unnamed: 0_level_0,SOURCEID,UKIDSS
Seq,Unnamed: 1_level_1,Unnamed: 2_level_1
2360,433834423123,433834423123.0
2361,433834423565,433834423565.0
2362,433836362467,
2363,433836362494,433836362494.0
2365,433832563084,433832563084.0


In [59]:
df_pairs.loc[df_pairs['UKIDSS']=='nan','UKIDSS'] = None
df_pairs.head(20)

Unnamed: 0_level_0,SOURCEID,UKIDSS
Seq,Unnamed: 1_level_1,Unnamed: 2_level_1
2360,433834423123,433834423123.0
2361,433834423565,433834423565.0
2362,433836362467,
2363,433836362494,433836362494.0
2365,433832563084,433832563084.0
2366,433836363120,433836363120.0
2368,433836362527,433836362527.0
2369,433836363145,433836363145.0
2370,433834437127,433834437127.0
2371,433836363155,433836363155.0


In [62]:
df_comp = df_pairs.dropna()
sum(df_comp['SOURCEID']==df_comp['UKIDSS'])

1324

In [67]:
non_match_portion = sum(df_comp['SOURCEID']!=df_comp['UKIDSS'])/len(df_comp)
print("Non matching portion: {:.2f}%".format(non_match_portion*100))

Non matching portion: 4.95%
