## Compare new-common sample to literature (SIMBAD)

In [1]:
import os, glob, getpass, sys
import warnings

import numpy as np
import matplotlib.pyplot as plt
from astropy.table import Table, join, vstack, hstack, Column, MaskedColumn, unique
from astropy.utils.exceptions import AstropyWarning
from astropy import units as u
from collections import Counter

user = getpass.getuser()
sys.path.append('/Users/' + user + '/Dropbox/my_python_packages')
path = '../' 
sys.path.append(path)

from gaia.cluster_comparison_n import Comparison_n as Comparison
from extra_codes import sample_initial as samp_ini, sample_comp as samp_comp

In [2]:
# Path to data =================================
warnings.simplefilter('ignore', AstropyWarning)
cols   = ['TYPED_ID', 'MAIN_ID', 'OTYPE_S', 'RA_d', 'DEC_d', 'FLUX_BIBCODE_J', 'NB_REF']

common = Table.read('common_sample_case_0.vot', format = 'votable')
new    = common[common['Control'] == 'N']
simbad = Table.read('simbad.xml', format = 'votable')
simbad = simbad[cols]
simbad.sort('RA_d')

print(f'Common-NO-Control in Simbad:             {len(simbad):>7.0f} Elements')
print(f'Common-NO-Control NOT in Simbad (NEW!!): {len(new) - len(simbad):>7.0f} Elements')

simbad[0:3]

Common-NO-Control in Simbad:                  77 Elements
Common-NO-Control NOT in Simbad (NEW!!):     166 Elements


TYPED_ID,MAIN_ID,OTYPE_S,RA_d,DEC_d,FLUX_BIBCODE_J,NB_REF
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,deg,deg,Unnamed: 5_level_1,Unnamed: 6_level_1
object,object,object,float64,float64,object,int32
Gaia dr2 6050333112920811776,2MASS J16155926-2329363,brownD*,243.99693062,-23.4934321,2003yCat.2246....0C,8
Gaia dr2 6050119498424114432,2MASS J16164396-2351257,brownD*,244.18318366,-23.85715638,2007MNRAS.374..372L,1
Gaia dr2 6050345104469958400,2MASS J16170148-2329058,brownD*,244.25618444,-23.48493246,2007MNRAS.374..372L,1


In [3]:
# Examine the most famous (known )objects ======
indexes = simbad.argsort(keys='NB_REF')[::-1]
simbad  = simbad[indexes]
simbad[0:5]

TYPED_ID,MAIN_ID,OTYPE_S,RA_d,DEC_d,FLUX_BIBCODE_J,NB_REF
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,deg,deg,Unnamed: 5_level_1,Unnamed: 6_level_1
object,object,object,float64,float64,object,int32
Gaia dr2 6049011053265501056,HD 147701,Star,246.08882697,-25.02540247,2003yCat.2246....0C,101
Gaia dr2 6050945747052676352,* rho Oph C,Star,246.39618174,-23.4052191,2003yCat.2246....0C,96
Gaia dr2 6047588594453743360,ROXs 43A,TTau*,247.83382793,-24.50140548,2003yCat.2246....0C,46
Gaia dr2 6050352659313505536,HD 146706,V*,244.61773753,-23.27430241,2003yCat.2246....0C,46
Gaia dr2 6050951072812273664,Haro 1-8,TTau*,246.69761733,-23.24783702,2003yCat.2246....0C,44


In [4]:
# Prepare File for SIMBAD Script Query =========
# This is to find who cites the objects in the common-control-no sample
simbad_inp  = ['query id ' + inp.decode('utf-8') for inp in simbad['TYPED_ID']]
Table([simbad_inp]).write('common_sample_case_0_no_control_known_simbad_script.txt', format = 'ascii.fast_no_header', overwrite = True)

!open common_sample_case_0_no_control_known_simbad_script.txt

In [5]:
print()
print('"common_sample_case_0_no_control_known_simbad_script.txt" IS CROSSED AGAINST SIMBAD-SCRIPT SERVICE TO find SIMBAD BIBCODES')
print()
print('THE OUTPUT IS SAVED IN THIS DIRECTORY AS simbad_bibcodes.txt')


"common_sample_case_0_no_control_known_simbad_script.txt" IS CROSSED AGAINST SIMBAD-SCRIPT SERVICE TO find SIMBAD BIBCODES

THE OUTPUT IS SAVED IN THIS DIRECTORY AS simbad_bibcodes.txt


In [6]:
# UPLOAD "common_sample_case_0_no_control_known_simbad_script.txt" TO THE SIMBAD SCRIPT (http://simbad.u-strasbg.fr/simbad/sim-fscript)
# PAGE AND DOWNLOAD OUTPUT AS "simbad_bibcodes.txt"
#Script Examples: 

#format object form1 "%IDLIST(1) %COO(d;A) %COO(d;D) %BIBCODELIST"
#result full
#query id Gaia dr2 6050346856816823552
#query id Gaia dr2 6050345104469958400

#format object form1 "%IDLIST(1) %BIBCODELIST"
#result full
#query id Gaia dr2 6050346856816823552
#query id Gaia dr2 6050345104469958400

In [7]:
# Read SIMBAD bibcodes =========================
bibcodes     = Table.read('simbad_bibcodes.txt', format = 'ascii.fast_no_header')
bibcodes_set = [inp for inp in set(bibcodes['col1'])]

print('Total bibcodes: ',      len(bibcodes))
print('Individual bibcodes: ', len(bibcodes_set))

counter = Counter(bibcodes['col1'])
most_10 = counter.most_common(10)

print()
for inp in most_10:
    print('Bibcode: ',inp[0], '  NRef: ',inp[1])

Total bibcodes:  932
Individual bibcodes:  407

Bibcode:  2012ApJ...758...31L   NRef:  26
Bibcode:  2016ApJS..224....2H   NRef:  24
Bibcode:  2003A&A...404..913S   NRef:  20
Bibcode:  2015MNRAS.448.2737R   NRef:  20
Bibcode:  2017ApJ...837...95B   NRef:  18
Bibcode:  2015ApJ...813...83C   NRef:  16
Bibcode:  2007ApJ...667..308C   NRef:  14
Bibcode:  2007ApJ...662..413K   NRef:  14
Bibcode:  2005A&A...438..769D   NRef:  12
Bibcode:  1998MNRAS.300..733M   NRef:  11


# Prepare Vizier Queries

In [8]:
# Define Vizier Querier ================================
simbad['vizier_id'] = [inp.decode('utf-8') for inp in simbad['TYPED_ID']]
radii               = 0.5 * u.arcsecond

In [9]:
# Query VizieR for Luhman 2012 Catalogue ===============
cat_2012_L = samp_ini.query_ids_1(simbad['vizier_id'], catalog='J/ApJ/758/31/table1', radius=radii, verbose=False)
cat_2012_L = cat_2012_L['inp_id', '_r', 'Name', 'SpT']
cat_2012_L

inp_id,_r,Name,SpT
Unnamed: 0_level_1,arcsec,Unnamed: 2_level_1,Unnamed: 3_level_1
str28,float64,bytes28,bytes8
Gaia dr2 6045914377549499648,0.04,2MASS J16270217-2542346,M6
Gaia dr2 6048976762246802944,0.11,2MASS J16233234-2523485,G1
Gaia dr2 6049011053265501056,0.05,2MASS J16242132-2501314,B5III
Gaia dr2 6049226042146798592,0.38,2MASS J16235470-2438319,M6
Gaia dr2 6049277886695322880,0.07,2MASS J16203456-2430205,M6.5
Gaia dr2 6050067061167006720,0.04,2MASS J16200757-2359150,M6
Gaia dr2 6050070531500394240,0.24,2MASS J16210222-2358395,M5.25
Gaia dr2 6050087677009711744,0.11,2MASS J16204596-2348208,K3
Gaia dr2 6050104582001653376,0.17,2MASS J16182082-2401502,M5.5
...,...,...,...


In [10]:
# Query VizieR for Rizutto 2015 Catalogue ==============
cat_2015_R = samp_ini.query_ids_1(simbad['vizier_id'], catalog='J/MNRAS/448/2737/table2', radius=radii, verbose=False)
cat_2015_R = cat_2015_R['inp_id', '_r', '_2MASS', 'SpT', 'AV']
cat_2015_R

inp_id,_r,_2MASS,SpT,AV
Unnamed: 0_level_1,arcsec,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
str28,float64,bytes17,bytes4,float32
Gaia dr2 6048608906890968960,0.05,J16212953-2529431,M2.5,0.7
Gaia dr2 6048935358761628288,0.08,J16262803-2526477,M0.0,1.8
Gaia dr2 6048948759059667712,0.13,J16263495-2511409,M0.0,1.1
Gaia dr2 6048976762246802944,0.11,J16233234-2523485,K2.0,1.3
Gaia dr2 6049045000683707392,0.07,J16271273-2504017,M1.0,0.9
Gaia dr2 6049456183673228672,0.2,J16235509-2330396,M2.5,1.2
Gaia dr2 6050345001390940160,0.05,J16171649-2327570,M3.5,0.6
Gaia dr2 6050346856816823552,0.1,J16172162-2325004,M4.0,0.5
Gaia dr2 6050373760491905920,0.2,J16173138-2303360,K1.5,0.0
Gaia dr2 6050487456864139392,0.05,J16200686-2247320,M4.5,0.9


In [11]:
# Query VizieR for Kraus 2012 Catalogue ================
cat_2007_K = samp_ini.query_ids_1(simbad['vizier_id'], catalog='J/ApJ/662/413/table2', radius=radii, verbose=False)
cat_2007_K = cat_2007_K['inp_id', '_r', 'SimbadName']
cat_2007_K

inp_id,_r,SimbadName
Unnamed: 0_level_1,arcsec,Unnamed: 2_level_1
str28,float64,bytes31
Gaia dr2 6048976762246802944,0.11,GSC 06798-00035
Gaia dr2 6049226042146798592,0.22,[SCH2006] J16235474-24383211
Gaia dr2 6050067061167006720,0.04,[SCH2006] J16200756-23591522
Gaia dr2 6050087677009711744,0.02,GSC 06794-00480
Gaia dr2 6050110354436327168,0.09,[SCH2006] J16174540-23533618
Gaia dr2 6050133031864786432,0.17,DENIS-P J161903.4-234408
Gaia dr2 6050164303519025024,0.14,[SCH2006] J16213591-23550341
Gaia dr2 6050211681303674368,0.07,[SCH2006] J16235158-23172740
Gaia dr2 6050373760491905920,0.13,ScoPMS060
Gaia dr2 6050583969069510272,0.05,DENIS-P J161840.8-220948


In [12]:
# Merge Catalogues and search for duplicates =====
merged = vstack([cat_2007_K, cat_2012_L, cat_2015_R])
merged = unique(merged, keys='inp_id')

print(len(cat_2007_K), len(cat_2015_R), len(cat_2012_L), len(merged))

14 20 25 43
