# 1.- Crossmatch Sample Initial, Gaia, and Control

In [1]:
import numpy as np
import warnings
import os, glob, getpass, sys

from astropy.table            import Table, join, vstack, hstack, Column, MaskedColumn, unique
from astropy.utils.exceptions import AstropyWarning
from astropy                  import units as u
import collections

user = getpass.getuser()
path = '/Users/hcanovas/Astrofisica/papers/2019_clustering_oph_final/'
sys.path.append(path)

In [2]:
# Read Samples ================================
warnings.filterwarnings('ignore', category=AstropyWarning, append=True)

cat_ini     = Table.read(path + 'sample_initial/sample_ini.vot')
cat_igaia   = Table.read(path + 'sample_initial/sample_initial_oph_x_gaia-result.vot')
cat_control = Table.read(path + 'sample_control/OPH___control_sample.vot')

# Apply Cosmetics =============================
cat_ini = cat_ini.group_by('col2mass')
cat_ini = cat_ini['col2mass', 'refs_2']
cat_ini.rename_column('refs_2', 'Ref.')

cat_igaia.convert_bytestring_to_unicode()
# cat_igaia.rename_column('col2mass', '_2MASS')
cat_igaia   = cat_igaia['source_id', 'col2mass']
cat_igaia   = cat_igaia[cat_igaia['source_id'].mask == False]
cat_igaia   = unique(cat_igaia, keys='source_id')

cat_control.convert_bytestring_to_unicode()
# cat_control.rename_column('col2mass', '_2MASS')
cat_control = cat_control['source_id', 'col2mass']
cat_control['Control'] = ['Y'] * len(cat_control)

print()
print(f'Elements in Sample Ini:      {len(cat_ini):10.0f}')
print(f'Elements in Sample Ini*Gaia: {len(cat_igaia):10.0f}')
print(f'Elements in Sample Control:  {len(cat_control):10.0f}')
print()


Elements in Sample Ini:             465
Elements in Sample Ini*Gaia:        304
Elements in Sample Control:         188



In [3]:
# Combine Initial & Control  samples ==========
# Note: Better start with Control as Control sample is astrometricall CLEAN
merged = join(cat_ini, cat_control, keys='col2mass', join_type='left')
merged['Control'][merged['Control'].mask == True] = 'N'

print(f'Lenght Merged:  {len(merged):10.0f}')
print(f'Control in Merged: {len(merged[merged["Control"] == "Y"]):7.0f}')
merged[0:3]

Lenght Merged:         465
Control in Merged:     188


col2mass,Ref.,source_id,Control
str17,str7,int64,str1
16211093-2343287,3,--,N
16211848-2254578,3,6050297206990387840,Y
16211918-2342287,3,6050172068822858624,Y


In [4]:
# Combine Initial*Control & Gaia*Ini  samples =
merged_2 = join(merged, cat_igaia, keys='col2mass', join_type='left')

# Correct for Control duplicates by join ======
for i in range(len(merged_2)):
    if merged_2['source_id_2'][i] not in cat_control['source_id']:
        merged_2['Control'][i] = 'N'

# Find duplicate values =======================
print(f'Lenght Merged: {len(merged_2)}')
print('Note: there are 5 duplicated Gaia IDs for 2MASS IDs below:')

coll   = collections.Counter(merged_2['col2mass'])
ids    = [inp for inp in coll]
ntimes = [coll[inp_id] for inp_id in ids]
nreps  = Table([ids, ntimes], names=['col2mass', 'Reps'])
duplex = nreps[nreps['Reps'] == 2]
duplex = join(duplex, merged, keys='col2mass')
duplex

Lenght Merged: 470
Note: there are 5 duplicated Gaia IDs for 2MASS IDs below:


col2mass,Reps,Ref.,source_id,Control
str17,int64,str7,int64,str1
16222099-2304025,2,3,--,N
16233609-2402209,2,3,6049399966845155840,Y
16253958-2426349,2,"1, 2, 3",--,N
16275565-2444509,2,1,6049090974013703424,Y
16282373-2441412,2,1,6049081284567483136,Y


In [5]:
# For sanity Check keep this cell =============
merged_2[merged_2['col2mass'] == duplex['col2mass'][1]]

col2mass,Ref.,source_id_1,Control,source_id_2
str17,str7,int64,str1,int64
16233609-2402209,3,6049399966845155840,Y,6049399966845155840
16233609-2402209,3,6049399966845155840,N,6049399971141149312


In [6]:
# Construct final merged table ================
merged_3 = merged_2.copy()
merged_3.remove_column('source_id_1')
merged_3.rename_column('source_id_2', 'source_id')

print()
print(f'Total Elements in merged: {len(merged_3):46.0f}')
print(f'Total Elements in merged after duplicate 2MASS-ID correction: {len(unique(merged_3, keys="col2mass")):10.0f}')
print(f'Total Gaia Elements in merged: {len(merged_3[merged_3["source_id"].mask == False]):41.0f}')
print(f'Control Elements in merged: {len(merged_3[merged_3["Control"] == "Y"]):44.0f}')


merged_3[0:3]


Total Elements in merged:                                            470
Total Elements in merged after duplicate 2MASS-ID correction:        465
Total Gaia Elements in merged:                                       304
Control Elements in merged:                                          188


col2mass,Ref.,Control,source_id
str17,str7,str1,int64
16211093-2343287,3,N,6050182204945677568
16211848-2254578,3,Y,6050297206990387840
16211918-2342287,3,Y,6050172068822858624


In [7]:
# For sanity Check keep this cell =============
merged_3[merged_3['col2mass'] == duplex['col2mass'][4]]

col2mass,Ref.,Control,source_id
str17,str7,str1,int64
16282373-2441412,1,N,6049081284565928192
16282373-2441412,1,Y,6049081284567483136


In [8]:
# Save Table =======================
merged_3.rename_column('col2mass',    '2MASS')
merged_3.rename_column('source_id', 'DR2 Source ID')
merged_3.write('sample_ini_cross.vot', format = 'votable', overwrite = True)
merged_3[120:125].write('sample_ini_cross.tex', format = 'ascii.latex', overwrite = True)
!open 'sample_ini_cross.tex'

merged_3[120:125]

2MASS,Ref.,Control,DR2 Source ID
str17,str7,str1,int64
16261949-2437275,"1, 2, 3",Y,6049122310095142656
16262083-2428395,"1, 3",N,--
16262096-2408468,3,Y,6049357429490158336
16262097-2408518,"1, 2",Y,6049357433785068672
16262138-2423040,"1, 3",N,--
