## Compare new-common sample to literature (SIMBAD)

In [1]:
import os, glob, getpass, sys
import warnings

import numpy as np
import matplotlib.pyplot as plt
from astropy.table import Table, join, vstack, hstack, Column, MaskedColumn, unique
from astropy.utils.exceptions import AstropyWarning
from astropy import units as u
from collections import Counter

user = getpass.getuser()
sys.path.append('/Users/' + user + '/Dropbox/my_python_packages')
path = '../' 
sys.path.append(path)

from gaia.cluster_comparison_n import Comparison_n as Comparison
from extra_codes import sample_initial as samp_ini, sample_comp as samp_comp

In [2]:
# Path to data =================================
warnings.simplefilter('ignore', AstropyWarning)

entire = Table.read('tab_3.vot', format = 'votable')
new    = entire[(entire['DOH'] == 'YYY') & (entire['Control'] == 'N')]
simbad = Table.read('simbad.xml', format = 'votable')
simbad = simbad['TYPED_ID', 'MAIN_ID', 'OTYPE_S', 'RA_d', 'DEC_d', 'FLUX_BIBCODE_J', 'NB_REF']
simbad.sort('RA_d')

print(f'Common-NO-Control in Simbad:             {len(simbad):>7.0f} Elements')
print(f'Common-NO-Control NOT in Simbad (NEW!!): {len(new) - len(simbad):>7.0f} Elements')

simbad[0:3]

Common-NO-Control in Simbad:                  77 Elements
Common-NO-Control NOT in Simbad (NEW!!):     166 Elements


TYPED_ID,MAIN_ID,OTYPE_S,RA_d,DEC_d,FLUX_BIBCODE_J,NB_REF
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,deg,deg,Unnamed: 5_level_1,Unnamed: 6_level_1
object,object,object,float64,float64,object,int32
Gaia dr2 6050333112920811776,2MASS J16155926-2329363,brownD*,243.99693062,-23.4934321,2003yCat.2246....0C,8
Gaia dr2 6050119498424114432,2MASS J16164396-2351257,brownD*,244.18318366,-23.85715638,2007MNRAS.374..372L,1
Gaia dr2 6050345104469958400,2MASS J16170148-2329058,brownD*,244.25618444,-23.48493246,2007MNRAS.374..372L,1


In [3]:
# Examine the most famous (known )objects ======
famous = simbad.group_by('NB_REF')
famous.reverse()
famous[0:5]

TYPED_ID,MAIN_ID,OTYPE_S,RA_d,DEC_d,FLUX_BIBCODE_J,NB_REF
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,deg,deg,Unnamed: 5_level_1,Unnamed: 6_level_1
object,object,object,float64,float64,object,int32
Gaia dr2 6049011053265501056,HD 147701,Star,246.08882697,-25.02540247,2003yCat.2246....0C,101
Gaia dr2 6050945747052676352,* rho Oph C,Star,246.39618174,-23.4052191,2003yCat.2246....0C,96
Gaia dr2 6047588594453743360,ROXs 43A,TTau*,247.83382793,-24.50140548,2003yCat.2246....0C,46
Gaia dr2 6050352659313505536,HD 146706,V*,244.61773753,-23.27430241,2003yCat.2246....0C,46
Gaia dr2 6050951072812273664,Haro 1-8,TTau*,246.69761733,-23.24783702,2003yCat.2246....0C,44


In [4]:
# Prepare File for SIMBAD Script Query =========
# To find the papers citing most objects in the common-control-no sample
simbad_inp     = ['query id ' + inp.decode('utf-8') for inp in simbad['TYPED_ID']]
simbad_sc_file = 'common_sample_case_0_no_control_simbad_out.txt' # Input file for SIMBAD script
Table([simbad_inp]).write(simbad_sc_file, format = 'ascii.fast_no_header', overwrite = True)

print()
print(f'INPUT FILE FOR SIMBAD-SCRIPT SERVICE:     {simbad_sc_file:}')
print('OUTPUT FILE OF SIMBAD-SCRIPT SERVICE:     simbad_bibcodes.txt')

!open $simbad_sc_file


INPUT FILE FOR SIMBAD-SCRIPT SERVICE:     common_sample_case_0_no_control_simbad_out.txt
OUTPUT FILE OF SIMBAD-SCRIPT SERVICE:     simbad_bibcodes.txt


In [5]:
# UPLOAD "common_sample_case_0_no_control_simbad_out.txt" TO http://simbad.u-strasbg.fr/simbad/sim-fscript
# DOWNLOAD OUTPUT AS "simbad_bibcodes.txt"
# Script Examples: 

#format object form1 "%IDLIST(1) %COO(d;A) %COO(d;D) %BIBCODELIST"
#result full
#query id Gaia dr2 6050346856816823552
#query id Gaia dr2 6050345104469958400

#format object form1 "%IDLIST(1) %BIBCODELIST"
#result full
#query id Gaia dr2 6050346856816823552
#query id Gaia dr2 6050345104469958400

In [6]:
# Read SIMBAD bibcodes =========================
bibcodes     = Table.read('simbad_bibcodes.txt', format = 'ascii.fast_no_header')
bibcodes_set = [inp for inp in set(bibcodes['col1'])]

print(f'Total bibcodes: {len(bibcodes):25.0f}')
print(f'Individual bibcodes: {len(bibcodes_set):20.0f}')
print('='*41)

counter = Counter(bibcodes['col1'])
most_10 = counter.most_common(10)

for inp in most_10:
    print('Bibcode: ',inp[0], '  NRef: ',inp[1])

Total bibcodes:                       932
Individual bibcodes:                  407
Bibcode:  2012ApJ...758...31L   NRef:  26
Bibcode:  2016ApJS..224....2H   NRef:  24
Bibcode:  2003A&A...404..913S   NRef:  20
Bibcode:  2015MNRAS.448.2737R   NRef:  20
Bibcode:  2017ApJ...837...95B   NRef:  18
Bibcode:  2015ApJ...813...83C   NRef:  16
Bibcode:  2007ApJ...667..308C   NRef:  14
Bibcode:  2007ApJ...662..413K   NRef:  14
Bibcode:  2005A&A...438..769D   NRef:  12
Bibcode:  1998MNRAS.300..733M   NRef:  11


# Prepare Vizier Queries

In [7]:
# Define Vizier Querier ================================
simbad['vizier_id'] = [inp.decode('utf-8') for inp in simbad['TYPED_ID']]
radii               = 0.5 * u.arcsecond

In [8]:
# Query VizieR for Luhman 2012 Catalogue ===============
cat_2012_L = samp_ini.query_ids_1(simbad['vizier_id'], catalog='J/ApJ/758/31/table1', radius=radii, verbose=False)
cat_2012_L = cat_2012_L['inp_id', '_r', 'Name', 'SpT']

# Query VizieR for Rizutto 2015 Catalogue ==============
cat_2015_R = samp_ini.query_ids_1(simbad['vizier_id'], catalog='J/MNRAS/448/2737/table2', radius=radii, verbose=False)
cat_2015_R = cat_2015_R['inp_id', '_r', '_2MASS', 'SpT', 'AV']

# Query VizieR for Kraus 2012 Catalogue ================
cat_2007_K = samp_ini.query_ids_1(simbad['vizier_id'], catalog='J/ApJ/662/413/table2', radius=radii, verbose=False)
cat_2007_K = cat_2007_K['inp_id', '_r', 'SimbadName']

In [9]:
# Merge Catalogues and search for duplicates =====
merged = vstack([cat_2007_K, cat_2012_L, cat_2015_R])
merged = unique(merged, keys='inp_id')

print(len(cat_2007_K), len(cat_2015_R), len(cat_2012_L), len(merged))

14 20 25 43


In [10]:
# Look at the individual catalogues ==============
cat_2012_L[0:3]

inp_id,_r,Name,SpT
Unnamed: 0_level_1,arcsec,Unnamed: 2_level_1,Unnamed: 3_level_1
str28,float64,bytes28,bytes8
Gaia dr2 6045914377549499648,0.04,2MASS J16270217-2542346,M6
Gaia dr2 6048976762246802944,0.11,2MASS J16233234-2523485,G1
Gaia dr2 6049011053265501056,0.05,2MASS J16242132-2501314,B5III


In [11]:
cat_2015_R[0:3]

inp_id,_r,_2MASS,SpT,AV
Unnamed: 0_level_1,arcsec,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
str28,float64,bytes17,bytes4,float32
Gaia dr2 6048608906890968960,0.05,J16212953-2529431,M2.5,0.7
Gaia dr2 6048935358761628288,0.08,J16262803-2526477,M0.0,1.8
Gaia dr2 6048948759059667712,0.13,J16263495-2511409,M0.0,1.1


In [12]:
cat_2007_K[0:3]

inp_id,_r,SimbadName
Unnamed: 0_level_1,arcsec,Unnamed: 2_level_1
str28,float64,bytes31
Gaia dr2 6048976762246802944,0.11,GSC 06798-00035
Gaia dr2 6049226042146798592,0.22,[SCH2006] J16235474-24383211
Gaia dr2 6050067061167006720,0.04,[SCH2006] J16200756-23591522
