In [13]:
import numpy as np
import pandas as pd
from collections import Counter
from astropy.io.fits import getdata
from astropy import units as u
from astropy.coordinates import SkyCoord, Angle
from astroquery.vizier import Vizier
from astropy.table import Table
from astroquery.xmatch import XMatch
from astroquery.simbad import Simbad
import time



import sys  
sys.path.insert(0, '../')

from prepare_library import atnf_pos, create_perobs_data, cal_ave, add_MW, confusion_clean, TD_clean
from muwclass_library import prepare_cols

from TD_dictionary import Simbad_dict,crowd_fields_dict,rare_sources_removed_dict,rare_sources_saving_dict 

import warnings
warnings.filterwarnings('ignore')

Vizier.ROW_LIMIT = -1
exnum = -999999.

## Comments
### 1. The updated TD can consist of two parts, one for the confidently classified (well-known) sources, the other for the candidates

### 2. X-ray sources match to SIMBAD, use SIMBAD classification (main_type as well as other_types), to add more rare-type sources and verifiy the existing classifications

SRG_ART_XC = Vizier(catalog="J/A+A/661/A38",row_limit=-1, #2022A&A...661A..38P
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints()[0]
print(SRG_ART_XC.to_pandas()['Type'].value_counts())
#print(SRG_ART_XC.to_pandas())
# ∼4×10−12 erg s−1 cm−2, 15", 4-12 keV


## Low-Mass X-ray Binaries from https://arxiv.org/abs/2206.10053

* Galactic UCXBs (~20 + 20 cands)
* Galactic LMXBs exhibiting clear total eclipses (~15)
* Galactic SyXBs (~20)
* LMXBs that are quasi-persistent (~20), i.e., transient systems but exhibiting prolonged outbursts of >1 yr
* VFXBs in our Galaxy (~40)
* Galactic LMXBs accreting around the Eddington limit.  (~20)
* Galactic globular cluster LMXBs (~20)


## HMXB checking https://arxiv.org/abs/2207.02114

## do not use NS https://github.com/NickSwainston/pulsar_spectra

## add a binary star type? 

* binaries candidates from APOGEE survey which we excluded [(APOGEE.s_HRV <= 1) & (APOGEE.s_HRV <= 5*APOGEE.errHRV)]
* Gaia binaries (Gaia Spectroscopic Orbits Validated with LAMOST and GALAH Radial Velocities; )


## More 

* AGNs: 6dF Optical AGN Catalog; BAT AGN Spectroscopic Survey; 2MASS Redshift Survey; https://astrocloud.nrao.edu/s/L2R3fYHGt2jCbTz; FIRST-NVSS-SDSS AGN catalog; WISE and Sloan Digital Sky Survey (SDSS) spectroscopic data catalog (Toba et al. 2014)
* YSOs: HST ULLYSES Program
* 4XMM-DR10 (Tranin et al. 2022).

## Make use of SIMBAD
### do not use their coordinates! 



In [14]:
# define some directories and output name

data_dir = './data'
old_TD_dir = './data'
field_name = 'CSCv2_TD_11162022'
verb = 0

query_dir = '../data/query'

In [6]:
open_CV = pd.read_csv('./data/updates/Open_CV_catalog.csv')
# no Nova

print(len(open_CV))
#print(open_CV['Type'].value_counts())
print(Counter(open_CV['Type']))
open_CV = open_CV[open_CV['Type'].isin(['Candidate', 'CataclyV', 'known CV', 'CataclyV_Candidate', 'Cataclysmic_Variable', 'Known CV'])]
print(len(open_CV))
print(Counter(open_CV['Type']))
open_CV['Class'] = np.nan
open_CV.loc[open_CV['Type'].isin([ 'CataclyV', 'known CV', 'Cataclysmic_Variable', 'Known CV']), 'Class'] = 'CV'
open_CV.loc[open_CV['Type'].isin([ 'Candidate', 'CataclyV_Candidate']), 'Class'] = 'CV-candidate'

print(len(open_CV))
#print(open_CV['Type'].value_counts())
#print(open_CV.columns)

open_CV['e_Pos'], open_CV['ref'] = np.nan, 'The Open Cataclysmic Variable Catalog'
open_CV = open_CV.rename(columns={'Name':'name_cat','Type':'SubClass'})

open_CV['R.A.'] = open_CV.apply(lambda r: str(r['R.A.']).split(',', 1)[0],axis=1)
open_CV['Dec.'] = open_CV.apply(lambda r: str(r['Dec.']).split(',', 1)[0],axis=1)
open_CV.to_csv('./data/updates/Open_CV_catalog_test.csv',index=False)
open_CV = pd.read_csv('./data/updates/Open_CV_catalog_test.csv')
open_CV['_RAJ2000'] = Angle(open_CV['R.A.'], 'hourangle').degree
open_CV['_DEJ2000'] = Angle(open_CV['Dec.'], 'deg').degree

open_CV = open_CV.drop(columns=['R.A.','Dec.','Disc. Date', 'Mag.'])



14413
Counter({'Candidate': 3819, 'CataclyV': 3669, nan: 2331, 'N': 1627, 'known CV': 902, 'CataclyV_Candidate': 851, 'DN': 439, 'Cataclysmic_Variable': 323, 'Known CV': 240, 'Nova_Candidate': 194, 'Candidate Nova': 17, 'AntiNova': 1})
9804
Counter({'Candidate': 3819, 'CataclyV': 3669, 'known CV': 902, 'CataclyV_Candidate': 851, 'Cataclysmic_Variable': 323, 'Known CV': 240})
9804


In [7]:
# some CVs catalog that may not included in Open CV catalog
CV_ZTF = Vizier(catalog="J/AJ/162/94/table1",row_limit=-1, #https://ui.adsabs.harvard.edu/abs/2021AJ....162...94S/abstract
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints()[0]
#CV_ZTF.to_pandas().to_csv('./data/updates/CV_ZTF.csv',index=False)
CV_ZTF = CV_ZTF['_RAJ2000','_DEJ2000','ZTF','Spec'].to_pandas().rename(columns={'Spec':'SubClass','ZTF':'name_cat'})
CV_ZTF['e_Pos'], CV_ZTF['ref'] = np.nan, '2021AJ....162...94S'
CV_ZTF['SubClass'] = CV_ZTF.apply(lambda r: 'Spec-'+r['SubClass'] if r['SubClass']!='' else '', axis=1)
CV_ZTF['Class'] = 'CV'
 
#print(CV_ZTF)

CV_can_ZTF = Vizier(catalog="J/AJ/162/94/table2",row_limit=-1, #https://ui.adsabs.harvard.edu/abs/2021AJ....162...94S/abstract
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints()[0]
#CV_can_ZTF.to_pandas().to_csv('./data/updates/CV_can_ZTF.csv',index=False)
CV_can_ZTF = CV_can_ZTF['_RAJ2000','_DEJ2000','ZTF','Spec'].to_pandas().rename(columns={'Spec':'SubClass','ZTF':'name_cat'})
CV_can_ZTF['e_Pos'], CV_can_ZTF['ref'] = np.nan, '2021AJ....162...94S'
CV_can_ZTF['SubClass'] = CV_can_ZTF.apply(lambda r: 'Spec-'+r['SubClass'] if r['SubClass']!='' else '', axis=1)
CV_can_ZTF['Class'] = 'CV-candidate'

#print(CV_can_ZTF)

#'''
CV_LAMOST_dr5 = Vizier(catalog="J/AJ/159/43",row_limit=-1, #2020AJ....159...43H 
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints()[0]
#CV_LAMOST.to_pandas().to_csv('./data/updates/CV_LAMOST.csv',index=False)
CV_LAMOST_dr5 = CV_LAMOST_dr5['_RAJ2000','_DEJ2000','LAMOST','Type','Type-lit'].to_pandas().rename(columns={'Type-lit':'SubClass','Type':'Class','LAMOST':'name_cat'})
CV_LAMOST_dr5['e_Pos'], CV_LAMOST_dr5['ref'] = np.nan, '2020AJ....159...43H'
CV_LAMOST_dr5 = CV_LAMOST_dr5[CV_LAMOST_dr5['Class']=='CV'].reset_index(drop=True)
CV_LAMOST_dr5['Class'] = 'CV-candidate'


#print(CV_LAMOST.to_pandas()
#print(CV_LAMOST_dr5)


CV_LAMOST_dr6 = Vizier(catalog="J/ApJS/257/65",row_limit=-1, #https://ui.adsabs.harvard.edu/abs/2021ApJS..257...65S/abstract
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints()[0]
#CV_LAMOST.to_pandas().to_csv('./data/updates/CV_LAMOST.csv',index=False)
CV_LAMOST_dr6 = CV_LAMOST_dr6['_RAJ2000','_DEJ2000','LAMOST','Set','sType1'].to_pandas().rename(columns={'sType1':'SubClass','LAMOST':'name_cat'})
CV_LAMOST_dr6['e_Pos'], CV_LAMOST_dr6['ref'] = np.nan, '2021ApJS..257...65S'
CV_LAMOST_dr6['Class'] = 'CV-candidate'
#CV_LAMOST_dr6.loc[CV_LAMOST_dr6['Set']=='new', 'Class'] = 'CV'
CV_LAMOST_dr6 = CV_LAMOST_dr6.drop(columns=['Set'])

#print(CV_LAMOST_dr6)
#'''


In [8]:
# Swift BAT 105-Month Hard X-ray Survey
# https://swift.gsfc.nasa.gov/results/bs105mon/
# add them to candidates
# https://ui.adsabs.harvard.edu/abs/2015AJ....150..170H/citations
# spectroscopically verified CVs can be used, but need accurate coordinates 

BAT105 = pd.read_csv(f'./data/updates/BAT105.csv')

BAT105.columns = BAT105.columns.str.strip() 
BAT105 = BAT105.applymap(lambda x: x.strip() if isinstance(x, str) else x)
BAT105.columns

print(len(BAT105))
#BAT105['RA']
#BAT105['BAT_NAME']
#print(Counter(BAT105['TYPE']))

#print(BAT105.columns)
#print(BAT105.groupby(['CL2','TYPE']).size())

BAT105 = BAT105[BAT105['TYPE'].isin(['Sy1.9','Sy1.5','Sy2','Sy1.2','Sy1.8','Beamed AGN','Sy1','CV','HMXB','LMXB','Pulsar'])]
BAT105['Class'] = np.nan
BAT105.loc[BAT105['TYPE'].isin(['Sy1.9','Sy1.5','Sy2','Sy1.2','Sy1.8','Beamed AGN','Sy1']), 'Class'] = 'AGN'
BAT105.loc[BAT105['TYPE'].isin([ 'CV']), 'Class'] = 'CV'
BAT105.loc[BAT105['TYPE'].isin([ 'HMXB']), 'Class'] = 'HMXB'
BAT105.loc[BAT105['TYPE'].isin([ 'LMXB']), 'Class'] = 'LMXB'
BAT105.loc[BAT105['TYPE'].isin([ 'Pulsar']), 'Class'] = 'NS'
print(Counter(BAT105['Class']))
#print(open_CV['Type'].value_counts())
#print(open_CV.columns)
print(BAT105.groupby(['CL2','TYPE']).size())

BAT105['e_Pos'], BAT105['ref'] = np.nan, '2018ApJS..235....4O'
BAT105 = BAT105.rename(columns={'COUNTERPART_NAME':'name_cat','TYPE':'SubClass','CTPT_RA':'_RAJ2000','CTPT_DEC':'_DEJ2000'})
BAT105 = BAT105[['name_cat','_RAJ2000','_DEJ2000','e_Pos','Class','SubClass','ref']]

#print(BAT105)


# 8.40×10−12 erg s−1 cm−2, 14-195 keV, ~10 arcmin


1632
Counter({'AGN': 973, 'LMXB': 109, 'HMXB': 108, 'CV': 75, 'NS': 25})
CL2  TYPE      
40   Sy1           161
     Sy1.2          96
     Sy1.5         111
     Sy1.8           9
50   Sy1.9         128
     Sy2           310
80   Beamed AGN    158
90   CV             75
150  Pulsar         25
180  HMXB          108
190  LMXB          109
dtype: int64


In [9]:
LAMOST_OB = Vizier(catalog="J/ApJS/241/32",row_limit=-1, #https://ui.adsabs.harvard.edu/abs/2021ApJS..257...65S/abstract
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints()[0]
#CV_LAMOST.to_pandas().to_csv('./data/updates/CV_LAMOST.csv',index=False)
print(LAMOST_OB.to_pandas())
LAMOST_OB = LAMOST_OB['_RAJ2000','_DEJ2000','ObsID','SpT'].to_pandas().rename(columns={'SpT':'SubClass','ObsID':'name_cat'})
LAMOST_OB['e_Pos'], LAMOST_OB['ref'] = np.nan, '2019ApJS..241...32L'
LAMOST_OB['Class'] = 'HM-STAR'
#CV_LAMOST_dr6 = CV_LAMOST_dr6.drop(columns=['Set'])
#LAMOST_OB


         _RAJ2000   _DEJ2000      ObsID     RAJ2000    DEJ2000         S_N  \
0       68.068632  53.143738   29814218   68.068632  53.143738  296.359985   
1       82.819712  28.924288     513098   82.819712  28.924288   28.740000   
2       95.782966  26.778336    1902033   95.782966  26.778336   69.180000   
3       32.732299  58.305086    2404184   32.732299  58.305086   84.250000   
4       35.327500  57.147800    2407116   35.327500  57.147800   47.400002   
...           ...        ...        ...         ...        ...         ...   
22896  288.815187  43.674379  581808073  288.815187  43.674379  179.630005   
22897  200.848058  26.275522  582314065  200.848058  26.275522   39.900002   
22898  233.317108  44.587383  582404042  233.317108  44.587383   99.750000   
22899  233.013558  42.962747  582405046  233.013558  42.962747   75.930000   
22900  172.122059  29.251185  584705034  172.122059  29.251185  122.010002   

       m_ObsID  Nobs   SSpT MKSpT    SpT       Comm  
0        

In [10]:
LAMOST_AFGK = pd.read_csv('./data/updates/dr8_v2.0_LRS_stellar.csv')
#CV_LAMOST.to_pandas().to_csv('./data/updates/CV_LAMOST.csv',index=False)
#print(LAMOST_OB.to_pandas())

LAMOST_AFGK = LAMOST_AFGK[['ra','dec','uid','subclass']].rename(columns={'ra':'_RAJ2000','dec':'_DEJ2000','subclass':'SubClass','uid':'name_cat'})
LAMOST_AFGK['e_Pos'], LAMOST_AFGK['ref'] = np.nan, 'LAMOST-DR8-AFGK'
LAMOST_AFGK['Class'] = 'LM-STAR'
#CV_LAMOST_dr6 = CV_LAMOST_dr6.drop(columns=['Set'])
#LAMOST_AFGK 

LAMOST_M = pd.read_csv('./data/updates/dr8_v2.0_LRS_mstellar.csv')
#CV_LAMOST.to_pandas().to_csv('./data/updates/CV_LAMOST.csv',index=False)
#print(LAMOST_OB.to_pandas())

LAMOST_M = LAMOST_M[['ra','dec','uid','subclass']].rename(columns={'ra':'_RAJ2000','dec':'_DEJ2000','subclass':'SubClass','uid':'name_cat'})
LAMOST_M['e_Pos'], LAMOST_M['ref'] = np.nan, 'LAMOST-DR8-M'
LAMOST_M['Class'] = 'LM-STAR'
#CV_LAMOST_dr6 = CV_LAMOST_dr6.drop(columns=['Set'])
#LAMOST_M

#print(LAMOST_AFGK['SubClass'].value_counts())

#print(LAMOST_M['SubClass'].value_counts())

# YSOs 
### from multiple molecular clouds and open clusters (Megeath et al. 2012; Povich et al. 2011; Ozawa et al. 2005; Giardino et al. 2007; Rebull et al. 2011; Delgado et al. 2011);

In [11]:
YSO1 = Vizier(catalog="J/AJ/144/192",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2012AJ....144..192M
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints(Cl='=|P|D')[0]
YSO1 = YSO1['_RAJ2000','_DEJ2000','Cl'].to_pandas().rename(columns={'Cl':'SubClass'})
YSO1['e_Pos'], YSO1['ref'] = np.nan, '2012AJ....144..192M'
print(len(YSO1),Counter(YSO1['SubClass']))

YSO2 = Vizier(catalog="J/ApJS/194/14/catalog",row_limit=-1,#https://ui.adsabs.harvard.edu/?#abs/2011ApJS..194...14P
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints(Stage='!=A')[0]
YSO2 = YSO2['_RAJ2000','_DEJ2000','Stage'].to_pandas().rename(columns={'Stage':'SubClass'})
YSO2['e_Pos'], YSO2['ref'] = np.nan, '2011ApJS..194...14P'
print(len(YSO2),Counter(YSO2['SubClass']))


YSO3 = Vizier(catalog="J/A+A/429/963",row_limit=-1,#https://ui.adsabs.harvard.edu/?#abs/2005A%26A...429..963O
    columns=['*', '_RAJ2000', '_DEJ2000','e_Pos']).query_constraints(Class='!=nIII')[0]
YSO3 = YSO3['_RAJ2000','_DEJ2000','e_Pos','Class'].to_pandas().rename(columns={'Class':'SubClass'})
YSO3['ref'] = '2005A&A...429..963O'
print(len(YSO3),Counter(YSO3['SubClass']))


YSO4 = Vizier(catalog="J/A%2bA/463/275/table5",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2007A%26A...463..275G
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints(clYSO='=|I|I/II|II|II/III|III')[0]
YSO4 = YSO4['_RAJ2000','_DEJ2000','clYSO'].to_pandas().rename(columns={'clYSO':'SubClass'})
YSO4['e_Pos'], YSO4['ref'] = np.nan, '2007A&A...463..275G'
print(len(YSO4),Counter(YSO4['SubClass']))

YSO5 = Vizier(catalog="J/ApJS/196/4",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2011ApJS..196....4R
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints(St='=|k|n')[0]
YSO5 = YSO5['_RAJ2000','_DEJ2000','St'].to_pandas().rename(columns={'St':'SubClass'})
YSO5['e_Pos'], YSO5['ref'] = np.nan,  '2011ApJS..196....4R'
print(len(YSO5),Counter(YSO5['SubClass']))

YSO6 = Vizier(catalog="J/A%2bA/531/A141/catalog",row_limit=-1,#https://ui.adsabs.harvard.edu/?#abs/2011A%26A...531A.141D
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints(MmD='=2')[0]
YSO6 = YSO6['_RAJ2000','_DEJ2000','MmD'].to_pandas().rename(columns={'MmD':'SubClass'})
YSO6['e_Pos'], YSO6['ref'] = np.nan, '2011A&A...531A.141D'
print(len(YSO6),Counter(YSO6['SubClass']))



3419 Counter({'D': 2991, 'P': 428})
808 Counter({'II': 478, '0/I': 247, 'III': 83})
72 Counter({'II': 26, '': 22, 'III': 17, 'I': 7})
56 Counter({'II': 20, 'III': 16, 'I/II': 9, 'I': 9, 'II/III': 2})
272 Counter({'k': 178, 'n': 94})
308 Counter({2: 308})


In [12]:
df_YSOs = pd.concat([YSO1, YSO2, YSO3, YSO4, YSO5, YSO6])
df_YSOs = df_YSOs.reset_index(drop=True)
df_YSOs['Class']='YSO'
print(len(df_YSOs),'YSOs')

4935 YSOs


# STARs
### from the Catalog of Stellar Spectral Classifications (Skiff 2014) with O, B or W (e.g., WN, WR stars) types are labeled as HM-STARs and A, F, G, K, or M types are labeled as LM-STARs;

In [3]:
stars = Vizier(catalog="B/mk/mktypes",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2014yCat....1.2023S
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints(Mag='<=23')[0]# Fainter sources with Mag > 23 were removed
stars = stars['_RAJ2000','_DEJ2000','Name','SpType','Bibcode','Remarks','Mag'].to_pandas()
stars = stars.replace(r'^\s*$', np.nan, regex=True)
print(len(stars)) 

937000


In [4]:
# Sources with their SpType column including strings of “e”, “s”, “n”, “p”, “f”, “cv”, “i”, “r”, “a”, “D”, “C”, “cont”, “l”,“H”, “h”, “abs”, “+”, “:”, “*”, “?” were removed
# since their spectral type may not be reliable 
stars_d1 = stars[stars['SpType'].str.contains('e|s|n|p|f|cv|i|r|a|D|C|cont|l|H|h|abs|\+|\*|\:|\?', na=False)]
stars_f1 = stars[~stars.set_index(['_RAJ2000','_DEJ2000']).index.isin(stars_d1.set_index(['_RAJ2000','_DEJ2000']).index)]
stars_f1 = stars_f1.reset_index(drop=True)

#print(len(stars))
#print(len(stars_d1))
#print(len(stars_f1))

# any sources with non-empty Remarks column were removed;
stars_d2 = stars_f1[stars_f1['Remarks'].isnull() == False]
stars_f2 = stars_f1[~stars_f1.set_index(['_RAJ2000','_DEJ2000']).index.isin(stars_d2.set_index(['_RAJ2000','_DEJ2000']).index)]
stars_f2 = stars_f2.reset_index(drop=True)

#print(len(stars_d2))
#print(len(stars_f2))

# Sources with “H97b” in their Name column were removed. They are Orion stars which are likely a mix of faint low-mass stars and YSOs and better to be dropped
stars_d3 = stars_f2[stars_f2['Name'].str.contains('H97b')]
stars_f3 = stars_f2[~stars_f2.set_index(['_RAJ2000','_DEJ2000']).index.isin(stars_d3.set_index(['_RAJ2000','_DEJ2000']).index)]
stars_f3 = stars_f3.reset_index(drop=True)

#print(len(stars_d3))
#print(len(stars_f3))

In [5]:
# Seperate high and low mass stars into their respective classes
star_hm = stars_f3[stars_f3['SpType'].str.startswith(tuple(['O','B','W']), na=False)]
star_lm = stars_f3[stars_f3['SpType'].str.startswith(tuple(['A','F','G','K','M']), na=False)]
star_hm = star_hm.reset_index(drop=True)
star_lm = star_lm.reset_index(drop=True)

star_hm['e_Pos'], star_hm['Class'] = np.nan, 'HM-STAR'
star_lm['e_Pos'], star_lm['Class'] = np.nan, 'LM-STAR'
df_HMSTARs = star_hm.rename(columns={'Name':'name_cat','SpType':'SubClass','Bibcode':'ref'}).drop(columns=['Remarks','Mag'])
df_LMSTARs = star_lm.rename(columns={'Name':'name_cat','SpType':'SubClass','Bibcode':'ref'}).drop(columns=['Remarks','Mag'])
print(len(df_HMSTARs))
print(len(df_LMSTARs))



62124
450224


In [7]:
print(df_LMSTARs['ref'].value_counts())
print(df_HMSTARs['ref'].value_counts())

2011AJ....141...97W    59396
2008AJ....135..785W    32686
1975MSS...C01....0H    21231
1949AnHar.112....1C    21180
1982MSS...C03....0H    19872
                       ...  
1937AnHar.105..491M        1
1992A&A...265...45C        1
1969ApJ...158.1091A        1
1956ApJ...124...43V        1
2004ApJ...608..809G        1
Name: ref, Length: 1737, dtype: int64
1976A&AS...23..283L    6054
1975MSS...C01....0H    4111
1978MSS...C02....0H    2651
2004MNRAS.353..601E    2349
1982MSS...C03....0H    2095
                       ... 
1988A&A...197..151C       1
1985A&AS...60..399G       1
1983A&AS...53...33L       1
1986ApJS...61..455M       1
1978AJ.....83..278C       1
Name: ref, Length: 1110, dtype: int64


#### Spectroscopically classified low-mass stars from the APOGEE data in SDSS Data Release 16 were obtained. We filtered out those unreliable sources if they don’t have effective temperature measurements or surface gravity measurements. We also removed those likely binary systems by filtering on the VSCATTER if VSCATTER > 1 km/s and/or VSCATTER > 5*VERR_MED. We also removed sources that are not flagged as a star based on Washington/DDO 51 photometry

In [16]:
APOGEE_all = Vizier(catalog="III/284/allstars",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2020AJ....160..120J
    columns=['*','_RAJ2000', '_DEJ2000','AName','Giant','Star']).query_constraints(Teff='>3000 & <10000',logg='>-1 & <7')[0]

APOGEE = APOGEE_all['_RAJ2000','_DEJ2000','AName','Giant','Star','TClass','Teff','logg','s_HRV','errHRV'].to_pandas()

APOGEE_STAR = APOGEE[APOGEE.Star == 1].reset_index(drop=True)
#APOGEE_STAR['Binary'] = ''
APOGEE_STAR.loc[(APOGEE_STAR.s_HRV <= 1) & (APOGEE_STAR.s_HRV <= 5*APOGEE_STAR.errHRV), 'TClass'] = APOGEE_STAR.loc[(APOGEE_STAR.s_HRV <= 1) & (APOGEE_STAR.s_HRV <= 5*APOGEE_STAR.errHRV), 'TClass'] + '|Binary'#.reset_index(drop=True)# & APOGEE.Teff.isnull() & (APOGEE.logg <= 7) & (APOGEE.logg >= -1) ]
print(Counter(APOGEE_STAR['TClass']))

APOGEE_STAR['e_Pos'], APOGEE_STAR['Class'], APOGEE_STAR['ref'] = np.nan, 'LM-STAR', '2020AJ....160..120J'
APOGEE_STAR = APOGEE_STAR.rename(columns={'AName':'name_cat','TClass':'SubClass'})
APOGEE_STAR = APOGEE_STAR.replace('none', np.nan, regex=True)
APOGEE_STAR = APOGEE_STAR.drop(columns=['Giant','Star','Teff','logg','s_HRV','errHRV'])
print(len(APOGEE_STAR))

Counter({'GKg_c|Binary': 54641, 'GKg_b|Binary': 52765, 'GKg_a|Binary': 43870, 'GKd_c|Binary': 23912, 'GKd_b|Binary': 23526, 'GKg_d|Binary': 22472, 'GKg_c': 19166, 'GKg_b': 18996, 'GKd_a|Binary': 16669, 'GKg_a': 12251, 'GKd_d|Binary': 10204, 'Mg_a|Binary': 7846, 'GKg_d': 7420, 'Mg_b': 7112, 'Mg_c': 7099, 'Fd_b|Binary': 6535, 'Fd_c|Binary': 6517, 'Mg_b|Binary': 6385, 'Mg_a': 6128, 'Mg_c|Binary': 5938, 'GKd_c': 5132, 'GKd_b': 5103, 'Md_b|Binary': 4996, 'Md_c|Binary': 4983, 'Fd_a|Binary': 4958, 'BA': 4786, 'Md_a|Binary': 3488, 'Fd_d|Binary': 3068, 'Mg_d|Binary': 2955, 'GKd_a': 2930, 'Mg_d': 2904, 'BA|Binary': 2561, 'Fd_c': 2381, 'Fd_b': 2251, 'GKd_d': 2083, 'Md_d|Binary': 1882, 'Md_c': 1592, 'Md_b': 1589, 'Fd_a': 1514, 'Fd_d': 924, 'Md_a': 791, 'Md_d': 635})
422958


## WRs
### HM-STARs from the VIIth Catalog of Galactic Wolf-Rayet Stars (van der Hucht 2001) and its annex catalog (van der Hucht 2006)

In [17]:
WRs1 = Vizier(catalog="III/215",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2001NewAR..45..135V
    columns=['*', '_RAJ2000', '_DEJ2000','OName']).query_constraints()[0]
WRs1 = WRs1['_RAJ2000','_DEJ2000','Name','OName','Aname'].to_pandas()
WRs1['Class'], WRs1['e_Pos'], WRs1['ref'], WRs1['SubClass'] = 'HM-STAR', np.nan, '2001NewAR..45..135V', np.nan#III/215
WRs1 = WRs1.replace(r'^\s*$', np.nan, regex=True)
WRs1['name_cat'] = WRs1['Name'].combine_first(WRs1['OName'].combine_first(WRs1['Aname']))
df_WRs1 = WRs1.drop(columns=['Name','OName','Aname'])
print(len(df_WRs1))


226


In [18]:
WRs2 = Vizier(catalog="J/A+A/458/453/table1",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2006A%26A...458..453V
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints()[0]
WRs2 = WRs2['_RAJ2000','_DEJ2000','SpType','SpType0','SimbadName','WRori'].to_pandas()
WRs2['Class'], WRs2['e_Pos'], WRs2['ref']= 'HM-STAR', np.nan, '2006A&A...458..453V'#J/A+A/458/453/table1
WRs2 = WRs2.replace(r'^\s*$', np.nan, regex=True)
WRs2['name_cat'] = WRs2['SimbadName'].combine_first(WRs2['WRori'])
WRs2['SubClass'] = WRs2['SpType'].combine_first(WRs2['SpType0'])
df_WRs2 = WRs2.drop(columns=['SpType','SpType0','SimbadName','WRori'])
print(len(df_WRs2))


118


# Quasars & AGNs 
### from Veron Catalog of Quasars & AGN 13th Edition (Veron-Cetty & Veron 2010)

In [19]:
AGNs = Vizier(catalog="VII/258/vv10",row_limit=-1,#https://ui.adsabs.harvard.edu/?#abs/2010A%26A...518A..10V
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints(Cl='|Q|A|B')[0]
AGNs = AGNs['_RAJ2000','_DEJ2000','Name','Cl'].to_pandas()
AGNs['Class'], AGNs['e_Pos'], AGNs['ref']= 'AGN', np.nan, '2010A&A...518A..10V'#VII/258/vv10
df_AGNs = AGNs.rename(columns={'Name':'name_cat','Cl':'SubClass'})

print(len(df_AGNs), Counter(df_AGNs['SubClass']))

168940 Counter({'Q': 133335, 'A': 34231, 'B': 1374})


# HMXBs
### from the Catalog of HMXBs in the Galaxy 4th Edition (Liu et al. 2006)

In [20]:
HMXBs = Vizier(catalog="J/A+A/455/1165/table1",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2006A%26A...455.1165L
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints()[0]
HMXBs = HMXBs['_RAJ2000','_DEJ2000','Name','Type'].to_pandas()
HMXBs['Class'], HMXBs['e_Pos'], HMXBs['ref'] = 'HMXB', np.nan, '2006A&A...455.1165L'#J/A+A/455/1165/table1
HMXBs.loc[HMXBs['Name']=='2S 1417-624', '_RAJ2000'], HMXBs.loc[HMXBs['Name']=='2S 1417-624', '_DEJ2000']= 215.3005894, -62.6989987
df_HMXBs = HMXBs.rename(columns={'Name':'name_cat','Type':'SubClass'})
print(len(df_HMXBs))


114


# LMXBs
### from the Low Mass X-ray Binary Catalog (Liu et al. 2007) and from the Catalog of CVs, LMXBs and related objects (Seventh edition) (Ritter & Kolb 2003)

In [21]:
LMXBs1 = Vizier(catalog="J/A+A/469/807",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2007A%26A...469..807L
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints()[0]
LMXBs1 = LMXBs1['_RAJ2000','_DEJ2000','Name','Type'].to_pandas()
LMXBs1['Class'], LMXBs1['e_Pos'], LMXBs1['ref'] = 'LMXB', np.nan, '2007A&A...469..807L'#J/A+A/469/807
df_LMXBs1 = LMXBs1.rename(columns={'Name':'name_cat','Type':'SubClass'})
print(len(df_LMXBs1))


187


In [22]:
LMXBs2 = Vizier(catalog="B/cb/lmxbdata",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2003A%26A...404..301R
    columns=['*', '_RAJ2000', '_DEJ2000','epos']).query_constraints()[0]

LMXBs2 = LMXBs2['_RAJ2000','_DEJ2000','epos','Name','Type1'].to_pandas()
LMXBs2['Class'], LMXBs2['ref'] = 'LMXB', '2003A&A...404..301R' #B/cb/lmxbdata
df_LMXBs2 = LMXBs2.rename(columns={'Name':'name_cat','Type1':'SubClass','epos':'e_Pos'})
print(len(df_LMXBs2))


108


# CVs
### from the Cataclysmic Variables Catalog 2006 Edition (Downes et al. 2001) and the Catalog of CVs, LMXBs and related objects (Seventh edition) (Ritter & Kolb 2003)

In [23]:
CVs1 = Vizier(catalog="V/123A",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2001PASP..113..764D
    columns=['*', '_RAJ2000', '_DEJ2000']).query_constraints()[0]
CVs1 = CVs1['_RAJ2000','_DEJ2000','Names','VarType'].to_pandas()
CVs1['Class'], CVs1['e_Pos'], CVs1['ref'] = 'CV', np.nan, '2005JAD....11....2D'#'2001PASP..113..764D'#V/123A
CVs1 = CVs1[CVs1['VarType']!='non-CV'].reset_index(drop=True)
df_CVs1 = CVs1.rename(columns={'Names':'name_cat','VarType':'SubClass'})
print(len(df_CVs1))


1618


In [24]:
CVs2 = Vizier(catalog="B/cb/cbdata",row_limit=-1, #https://ui.adsabs.harvard.edu/?#abs/2003A%26A...404..301R
    columns=['*', '_RAJ2000', '_DEJ2000','epos']).query_constraints()[0]
CVs2 = CVs2['_RAJ2000','_DEJ2000','epos','Name','Type1'].to_pandas()
CVs2['Class'], CVs2['ref'] = 'CV', '2003A&A...404..301R'#B/cb/cbdata
df_CVs2 = CVs2.rename(columns={'Name':'name_cat','Type1':'SubClass','epos':'e_Pos'})
print(len(df_CVs2))


1429


# NS & NS_BIN
### from the ATNF Pulsar Catalog (Manchester et al. 2005)

In [25]:
import urllib3
#https://ui.adsabs.harvard.edu/abs/2005AJ....129.1993M/abstract
http = urllib3.PoolManager()
r = http.request('GET', 'https://www.atnf.csiro.au/research/pulsar/psrcat/proc_form.php?version=1.65&Name=Name&RaJ=RaJ&DecJ=DecJ&Binary=Binary&Type=Type&startUserDefined=true&c1_val=&c2_val=&c3_val=&c4_val=&sort_attr=jname&sort_order=asc&condition=&pulsar_names=&ephemeris=short&coords_unit=raj%2Fdecj&radius=&coords_1=&coords_2=&style=Long+csv+with+errors&no_value=*&fsize=3&x_axis=&x_scale=linear&y_axis=&y_scale=linear&state=query&table_bottom.x=35&table_bottom.y=15') # it's a file like object and works just like a file
r.status

ATNF = r.data.decode('utf-8').partition('\n<pre>\n')[2].partition('\n</pre>\n')[0].replace('*',' ').split('\n')
NSs = pd.DataFrame(columns=['src', 'NAME','Name_ref','RAJ','e_RA','RAJ_ref','DECJ','e_DEC','DECJ_ref','Binary','Binary_ref','PSR_type','Type_ref'], 
                data=[row.split(';') for row in ATNF[2:]])

NSs['_RAJ2000'] = NSs.apply(lambda row: atnf_pos(row.RAJ, row.e_RA, 'hms', 'pos'), axis=1)
NSs['_e_RAJ2000'] = NSs.apply(lambda row: atnf_pos(row.RAJ, row.e_RA, 'hms', 'err'), axis=1)
NSs['_DEJ2000'] = NSs.apply(lambda row: atnf_pos(row.DECJ, row.e_DEC, 'dms', 'pos'), axis=1)
NSs['_e_DEJ2000'] = NSs.apply(lambda row: atnf_pos(row.DECJ, row.e_DEC, 'dms', 'err'), axis=1)
NSs['e_Pos'] = NSs.apply(lambda row: max(row._e_RAJ2000 , row._e_DEJ2000), axis=1)


# correcting the inaccurate coordinates of two NSs
NSs.loc[NSs.NAME=='J1819-1458', '_RAJ2000'] = 274.8924
NSs.loc[NSs.NAME=='J1819-1458', '_DEJ2000'] = -14.9676579999999
NSs.loc[NSs.NAME=='J1741-2054', '_RAJ2000'] = 265.48868
NSs.loc[NSs.NAME=='J1741-2054', '_DEJ2000'] = -20.903278
NSs.loc[NSs.NAME=='J1718-3718', '_RAJ2000'] = 259.5409420
NSs.loc[NSs.NAME=='J1718-3718', '_DEJ2000'] = -37.3143054

# non-empty Binary column are binary non-accreting NSs (NS_BIN class) and share similar properties with LMXBs.
NSs['ref']= '2005AJ....129.1993M'#B/psr/psr
NSs['Class'] = 'NS_BIN'
print(len(NSs),Counter(NSs['Binary']))
NSs.loc[NSs['Binary']==' ', 'Class'] = 'NS'

# adding a few new NSs and NS_BINs
new_NS_BINs = pd.read_csv(f'{old_TD_dir}/new_NS_BIN.csv')
print(NSs.loc[NSs.NAME.isin(new_NS_BINs.name_cat.values), ['NAME','Binary','Class']])

df_NSs = NSs[['NAME','_RAJ2000','_DEJ2000','e_Pos','Class','PSR_type','ref']].rename(columns={'NAME':'name_cat','PSR_type':'SubClass'})
print(len(df_NSs))

print(len(df_NSs),Counter(df_NSs['Class']))


3177 Counter({' ': 2844, 'ELL1': 143, 'BT': 107, 'DD': 39, 'DDH': 14, 'ELL1H': 10, 'BTX': 7, 'DDGR': 4, 'T2': 3, 'MSS': 3, 'DDS': 1, 'BT2P': 1, 'DDK': 1})
             NAME Binary   Class
6      J0023+0923   ELL1  NS_BIN
60     J0101-6422     BT  NS_BIN
313   J0737-3039B     DD  NS_BIN
560    J1124-3653             NS
644    J1231-1411     BT  NS_BIN
714    J1311-3430   ELL1  NS_BIN
921    J1514-4946   ELL1  NS_BIN
984      B1534+12     DD  NS_BIN
1107   J1614-2230  ELL1H  NS_BIN
1172   J1628-3205     BT  NS_BIN
1294   J1653-0158   ELL1  NS_BIN
1494   J1731-1847    BTX  NS_BIN
1836   J1810+1744     BT  NS_BIN
1881   J1816+4510   ELL1  NS_BIN
2545   J1909-3744   ELL1  NS_BIN
2909     B1957+20     BT  NS_BIN
2953   J2017+0603  ELL1H  NS_BIN
3002   J2043+1711   ELL1  NS_BIN
3013   J2047+1053     BT  NS_BIN
3018   J2051-0827   ELL1  NS_BIN
3100   J2214+3000   ELL1  NS_BIN
3102   J2215+5135   ELL1  NS_BIN
3106   J2222-0137     DD  NS_BIN
3125   J2241-5236     BT  NS_BIN
3133   J2256-1024   

# HMXBs, LMXBs, and CVs from INTEGRAL General Reference Catalog (IGRS) and HMXBs from Be Star catalog

In [26]:
df_HMXB_Be = pd.read_csv(f'{old_TD_dir}/raretype_BeStar_IGRS.csv')
print(Counter(df_HMXB_Be['Class']))

Counter({'HMXB': 58, 'LMXB': 8, 'CV': 5})


# Combining sources together

In [27]:
df_TD = pd.concat([df_AGNs, df_YSOs, df_LMSTARs, APOGEE_STAR, df_HMSTARs, df_WRs1, df_WRs2, df_NSs, df_HMXBs, df_LMXBs1, df_LMXBs2, df_CVs1, df_CVs2, df_HMXB_Be,open_CV,CV_ZTF,CV_can_ZTF,CV_LAMOST_dr5,CV_LAMOST_dr6,BAT105,LAMOST_OB,LAMOST_AFGK,LAMOST_M], ignore_index=True, sort=False)



In [28]:
df_TD.head(5)

Unnamed: 0,_RAJ2000,_DEJ2000,name_cat,SubClass,Class,e_Pos,ref
0,0.005417,-2.033333,FIRST J00000-0202,Q,AGN,,2010A&A...518A..10V
1,0.005833,-30.6075,2QZ J000001-3036,Q,AGN,,2010A&A...518A..10V
2,0.007083,-31.373889,2QZ J000001-3122,Q,AGN,,2010A&A...518A..10V
3,0.01125,-25.193611,XMM J00000-2511,Q,AGN,,2010A&A...518A..10V
4,0.011667,-35.059167,MS 23574-3520,Q,AGN,,2010A&A...518A..10V


In [29]:
print(len(df_TD), sorted(Counter(df_TD['Class']).items()))

8591103 [('AGN', 169913), ('CV', 8354), ('CV-candidate', 5641), ('HM-STAR', 85369), ('HMXB', 280), ('LM-STAR', 8312997), ('LMXB', 412), ('NS', 2869), ('NS_BIN', 333), ('YSO', 4935)]


In [30]:
df_TD.to_csv(f'./data/{field_name}_all.csv',index=False)

In [31]:
df_TD = pd.read_csv(f'./data/{field_name}_all.csv')

In [31]:
# matching with CSCv2 

for i in range(9):
    print(i)
    if i !=8:
        TD_CSC = XMatch.query(cat1= Table.from_pandas(df_TD[i*1000000:(i+1)*1000000]), #open('/Users/yanghui/Desktop/Research/2019/MUWCLASS_Project/ML/DATA/TD/versions/CSC_TD_v5_09062021.csv'),
                              cat2='vizier:IX/57/csc2master',
                              max_distance=3*u.arcsec, colRA1='_RAJ2000',colDec1='_DEJ2000')

        TD_CSC = TD_CSC.to_pandas()
        print(len(TD_CSC), sorted(Counter(TD_CSC['Class']).items()))
        TD_CSC.to_csv(f'./data/TD_CSC_{i}.csv',index=False)
    else:
        TD_CSC = XMatch.query(cat1= Table.from_pandas(df_TD[i*1000000:]), #open('/Users/yanghui/Desktop/Research/2019/MUWCLASS_Project/ML/DATA/TD/versions/CSC_TD_v5_09062021.csv'),
                              cat2='vizier:IX/57/csc2master',
                              max_distance=3*u.arcsec, colRA1='_RAJ2000',colDec1='_DEJ2000')

        TD_CSC = TD_CSC.to_pandas()
        print(len(TD_CSC), sorted(Counter(TD_CSC['Class']).items()))
        TD_CSC.to_csv(f'./data/TD_CSC_{i}.csv',index=False)

0
9821 [('AGN', 6174), ('LM-STAR', 2202), ('YSO', 1445)]
1
3008 [('AGN', 254), ('CV', 607), ('CV-candidate', 68), ('HM-STAR', 977), ('HMXB', 127), ('LM-STAR', 352), ('LMXB', 232), ('NS', 243), ('NS_BIN', 148)]
2
344 [('LM-STAR', 344)]
3
301 [('LM-STAR', 301)]
4
350 [('LM-STAR', 350)]
5
356 [('LM-STAR', 356)]
6
437 [('LM-STAR', 437)]
7
642 [('LM-STAR', 642)]
8
636 [('LM-STAR', 636)]


In [32]:
TD_CSC = pd.DataFrame()
for i in range(9):
    #print(i)
    
    TD_CSC = pd.concat([TD_CSC, pd.read_csv(f'./data/TD_CSC_{i}.csv')], ignore_index=True, sort=False)

print(len(TD_CSC), sorted(Counter(TD_CSC['Class']).items()))

15895 [('AGN', 6428), ('CV', 607), ('CV-candidate', 68), ('HM-STAR', 977), ('HMXB', 127), ('LM-STAR', 5620), ('LMXB', 232), ('NS', 243), ('NS_BIN', 148), ('YSO', 1445)]


In [33]:
TD_CSC.head(5)

TD_CSC.to_csv(f'{data_dir}/{field_name}_Xmatch_all.csv',index=False)



In [34]:
TD_CSC = pd.read_csv(f'{data_dir}/{field_name}_Xmatch_all.csv')

# drop duplicated sources
TD_CSC = TD_CSC.sort_values(by=['angDist']) 
TD_CSC = TD_CSC.drop_duplicates(subset=['_RAJ2000_1', '_DEJ2000_1', 'name_cat', 'SubClass', 'Class','e_Pos', 'ref']).reset_index(drop=True)

print(len(TD_CSC), sorted(Counter(TD_CSC['Class']).items()))


14504 [('AGN', 6170), ('CV', 490), ('CV-candidate', 57), ('HM-STAR', 847), ('HMXB', 114), ('LM-STAR', 4954), ('LMXB', 192), ('NS', 183), ('NS_BIN', 90), ('YSO', 1407)]


In [35]:
# calculate the combined positional uncertainties (PUs) from X-ray positions and class-specific catalog coordinates 
TD_CSC['PU'] = np.sqrt(TD_CSC.e_Pos.fillna(0)*2**2+TD_CSC.r0.fillna(0)**2)

TD_CSC['name'] = TD_CSC.apply(lambda row: '2CXO '+str(row['2CXO']),axis=1)

# Sources from populous classes (AGNs, HM-STARs, LM-STARs and YSOs) are omitted if their class-specific catalog 
# and X-ray combined 2-σ PUs are > 1" or 
# if the separations of the class-specific catalog and the CSCv2 coordinates exceed the 2-σ PUs.

idx = np.where( ((TD_CSC['angDist']>TD_CSC['PU']) | (TD_CSC['PU'] >1.) )& ((TD_CSC['Class']=='AGN') | (TD_CSC['Class']=='YSO') | (TD_CSC['Class']=='HM-STAR') | (TD_CSC['Class']=='LM-STAR') ))[0]
print('Remove', len(idx), sorted(Counter(TD_CSC.loc[idx, 'Class']).items()))
TD_CSC = TD_CSC.drop(TD_CSC.index[idx])
TD_CSC = TD_CSC.reset_index(drop=True)
print(len(TD_CSC), sorted(Counter(TD_CSC['Class']).items()))

TD = TD_CSC.rename(columns={'_RAJ2000_1':'ra_cat','_DEJ2000_1':'dec_cat','angDist':'sep','RAICRS':'ra','DEICRS':'dec'})[['name_cat','ra_cat','dec_cat','e_Pos','Class','SubClass','ref','sep','name','ra','dec','r0','r1','PA','PU']].sort_values(by=['Class','ra']).reset_index(drop=True)


#TD['remove_code'] = 0



Remove 8608 [('AGN', 4534), ('HM-STAR', 405), ('LM-STAR', 3186), ('YSO', 483)]
5896 [('AGN', 1636), ('CV', 490), ('CV-candidate', 57), ('HM-STAR', 442), ('HMXB', 114), ('LM-STAR', 1768), ('LMXB', 192), ('NS', 183), ('NS_BIN', 90), ('YSO', 924)]


In [50]:
# cross-matching to SIMBAD 

CSCv2 = Table.read('./data/CSCv2.vot').to_pandas()

#CSCv2['_RAJ2000'] = Angle(CSCv2['_RAJ2000'], 'deg').degree*u.degree 
#CSCv2['_DEJ2000'] = Angle(CSCv2['_DEJ2000'], 'deg').degree*u.degree 

CSCv2_simbad = XMatch.query(cat1=Table.from_pandas(CSCv2),
                         cat2='vizier:SIMBAD',max_distance=3 * u.arcsec, colRA1='_RAJ2000',colDec1='_DEJ2000')

CSCv2_simbad = CSCv2_simbad.to_pandas().rename(columns={'ra':'ra_simbad', 'dec':'dec_simbad', 'angDist':'_r_simbad'}).sort_values(by=['_r_simbad']) 
CSCv2_simbad = CSCv2_simbad.drop_duplicates(subset=['_2CXO'], keep='first').reset_index(drop=True)
print(len(CSCv2_simbad))
#print(CSCv2_simbad.columns)
#TD= pd.merge(TD, TD_simbad, how='outer', on = ['name_cat','ra_cat','dec_cat','e_Pos','Class','SubClass','ref', 'sep', 'name', 'ra', 'dec', 'r0','PU','remove_code'])

#TD.loc[TD.name_cat.isnull(), 'name_cat'] = TD.loc[TD.name_cat.isnull(), 'main_id']
#TD.loc[TD.name_cat.isnull(), 'name_cat'] = TD.loc[TD.name_cat.isnull(), 'name']
#print(TD.columns[:50])


88049


In [41]:
CSCv2_simbad['name'] = CSCv2_simbad.apply(lambda r: '2CXO '+ r['_2CXO'],axis=1)
CSCv2_simbad['ref'] = 'SIMBAD'
CSCv2_simbad = CSCv2_simbad.rename(columns={'_r_simbad':'sep','RAICRS':'ra', 'DEICRS':'dec','main_id':'name_cat',
                    'ra_simbad':'ra_cat','dec_simbad':'dec_cat', 'coo_err_maj':'e_Pos','main_type':'Class', 'other_types':'SubClass'})
CSCv2_simbad['PU'] = np.sqrt((CSCv2_simbad.e_Pos.fillna(0)*2)**2+CSCv2_simbad.r0.fillna(0)**2)


In [42]:
CSCv2_simbad[['name_cat', 'nbref', 'ra_sexa', 'dec_sexa', 'coo_qual','coo_bibcode']]

Unnamed: 0,name_cat,nbref,ra_sexa,dec_sexa,coo_qual,coo_bibcode
0,2CXO J113510.4-605605,1,11 35 10.4690,-60 56 05.335,D,2010ApJS..189...37E
1,2CXO J113503.0-605521,1,11 35 03.0329,-60 55 21.149,D,2010ApJS..189...37E
2,2CXO J113509.9-605500,1,11 35 09.9680,-60 55 00.531,D,2010ApJS..189...37E
3,2CXO J113512.2-605559,1,11 35 12.2187,-60 55 59.691,D,2010ApJS..189...37E
4,2CXO J113514.1-605605,1,11 35 14.1441,-60 56 05.662,D,2010ApJS..189...37E
...,...,...,...,...,...,...
88044,2XMM J122224.7+043324,1,12 22 24.710,+04 33 24.04,C,2009ApJS..182..543A
88045,[KCE2014] 183.3660+02.9600,1,12 13 27.8,+02 57 36,D,2014MNRAS.445.1430K
88046,XMMU J005010.7-731931,1,00 50 10.707,-73 19 31.23,D,2013A&A...558A...3S
88047,GPM 210.409761+54.557029,1,14 01 38.35648,+54 33 24.0570,A,2020yCat.1350....0G


In [45]:
print(CSCv2_simbad.columns)
print(TD.columns)
print(TD['Class'].value_counts())

Index(['sep', '_RAJ2000', '_DEJ2000', '_2CXO', 'ra', 'dec', 'r0', 'r1', 'PA',
       'name_cat', 'ra_cat', 'dec_cat', 'e_Pos', 'coo_err_min',
       'coo_err_angle', 'nbref', 'ra_sexa', 'dec_sexa', 'coo_qual',
       'coo_bibcode', 'Class', 'SubClass', 'radvel', 'radvel_err', 'redshift',
       'redshift_err', 'sp_type', 'morph_type', 'plx', 'plx_err', 'pmra',
       'pmdec', 'pm_err_maj', 'pm_err_min', 'pm_err_pa', 'size_maj',
       'size_min', 'size_angle', 'B', 'V', 'R', 'J', 'H', 'K', 'u', 'g', 'r',
       'i', 'z', 'name', 'ref', 'PU'],
      dtype='object')


NameError: name 'TD' is not defined

In [40]:
#print(len(CSCv2_simbad), len(TD))
#TD_simbad = pd.merge(CSCv2_simbad[['name','_r_simbad','RAICRS', 'DEICRS', 'r0',
       #'r1', 'PA', 'main_id','ra_simbad', 'dec_simbad','main_type', 'other_types']],
       #         TD[['name_cat', 'ra_cat', 'dec_cat', 'e_Pos', 'Class', 'SubClass', 'ref',
       #'sep', 'name', 'ra', 'dec', 'r0', 'r1', 'PA', 'PU']], on='name', how='outer' )
#print(len(TD_simbad))

print(len(TD), len(CSCv2_simbad))
TD_simbad  = pd.concat([TD, CSCv2_simbad[['name_cat', 'ra_cat', 'dec_cat', 'e_Pos', 'Class', 'SubClass', 'ref',
       'sep', 'name', 'ra', 'dec', 'r0', 'r1', 'PA', 'PU','nbref','coo_bibcode']]], ignore_index=True, sort=False)
print(len(TD_simbad))
TD_simbad = TD_simbad[TD_simbad['Class']!='CV-candidate'].reset_index(drop=True)
print(len(TD_simbad))

5896 88049
93945
93888


In [41]:
print(TD_simbad.columns)

Index(['name_cat', 'ra_cat', 'dec_cat', 'e_Pos', 'Class', 'SubClass', 'ref',
       'sep', 'name', 'ra', 'dec', 'r0', 'r1', 'PA', 'PU', 'nbref',
       'coo_bibcode'],
      dtype='object')


In [42]:

TD_simbad[TD_simbad.duplicated(subset=['name'])]['Class'].unique()

array(['AGN', 'CV', 'HM-STAR', 'HMXB', 'LM-STAR', 'LMXB', 'NS', 'NS_BIN',
       'YSO', 'X', 'Orion_V*', 'RSCVn', 'QSO', 'TTau*', 'CataclyV*',
       'NIR', 'V*', 'Star', 'WR*', 'PulsV*delSct', 'Seyfert_1', 'Radio',
       'Seyfert_2', 'Pulsar', 'Em*', 'Planet', 'Seyfert', 'EB*', 'Blazar',
       'Cl*', 'BClG', 'Planet?_Candidate', 'low-mass*', 'SB*', 'LINER',
       'GlCl', 'PM*', '**', 'IR', 'BLLac', 'LensSystem_Candidate',
       'Unknown_Candidate', 'YSO_Candidate', 'EmObj', 'RotV*',
       'Radio(cm)', 'WD*_Candidate', 'BYDra', 'GinPair', 'QSO_Candidate',
       'Erupt*RCrB', 'EB*_Candidate', 'Ae*_Candidate', 'Nova', 'HH',
       'ULX?_Candidate', 'Galaxy', 'CV*_Candidate', 'PN', 'PartofG',
       'LPV*', 'BH_Candidate', 'PulsV*bCep', 'denseCore', 'Be*', 'PulsV*',
       'GravLensSystem', 'RGB*_Candidate', 'LensedImage', 'XB',
       'Eruptive*', 'RedSG*', 'Irregular_V*', 'MolCld', 'Cloud',
       'Symbiotic*', 'ClG', 'Ae*', 'BlueSG*', 'HII', 'GinCl', 'EmG',
       'brownD*', 'Neu

In [43]:
TD_simbad['rare-type'] = 4
#TD_simbad['rare-type'] = TD_simbad.apply(lambda r: True if r['Class'] is in Simbad_dict['rare-type'] else True if set(r['SubClass'].str.split("|")).isdisjoint(set(Simbad_dict['rare-type'])), axis=1)
#TD_simbad['rare-type'] = TD_simbad.apply(lambda r: True if r['Class'] in Simbad_dict['rare-type'] else False, axis=1)
#TD_simbad['rare-type'] = TD_simbad.apply(lambda r: False if type(r['SubClass']) == float else True if set(r['SubClass'].split("|")).isdisjoint(set(Simbad_dict['rare-type'])) else False, axis=1)
TD_simbad['rare-type'] = TD_simbad.apply(lambda r: 1 if r['Class'] in Simbad_dict['rare-type'] else 2 if type(r['SubClass']) == float else 3 if not set(r['SubClass'].split("|")).isdisjoint(set(Simbad_dict['rare-type'])) else 4, axis=1)
#

In [44]:
print(TD_simbad['rare-type'].value_counts())
print(TD_simbad.loc[TD_simbad['rare-type']==3, ['Class','SubClass']])
print(TD_simbad.loc[TD_simbad['rare-type']==1, 'Class'].value_counts())
print(TD_simbad[TD_simbad['rare-type']==1].groupby(['Class','ref']).size())

4    89396
1     3840
2      586
3       66
Name: rare-type, dtype: int64
                Class            SubClass
5982   ULX?_Candidate       HXB|ULX|UX?|X
6895              Be*  *|BS*|Be*|HXB|IR|X
6940              AGN           AGN|CV*|X
7020   ULX?_Candidate           HXB|UX?|X
9489         SFregion       HXB|SFR|ULX|X
...               ...                 ...
79998  ULX?_Candidate       HXB|ULX|UX?|X
81525            Nova          CV*|No*|V*
87761           gamma           Psr|X|gam
88552            LPV*      *|**|CV*|LP*|X
93673   CV*_Candidate            *|CV*|V*

[66 rows x 2 columns]
HMXB         1216
XB            928
LMXB          599
CV            490
CataclyV*     186
NS            183
Pulsar        146
NS_BIN         90
Neutron*        2
Name: Class, dtype: int64
Class      ref                                  
CV         2003A&A...404..301R                        94
           2005JAD....11....2D                       177
           2018ApJS..235....4O                  

In [45]:


df_crowd_fields = pd.DataFrame.from_dict(crowd_fields_dict, orient='index')

df_crowd_fields.reset_index(inplace=True)

df_crowd_fields = df_crowd_fields.rename(columns = {'index':'Name','ra':'RAJ2000', 'dec':'DEJ2000','r':'r2'})

#df_crowd_fields


In [46]:
# Globular Clusters 2013A&A...558A..53K

GCs = Vizier(columns=["**"], catalog="J/A+A/558/A53/catalog", row_limit=-1,column_filters={"Type":"g"}).query_constraints()
df_GCs = GCs[0].to_pandas()
#df_GCs = df_GCs[df_GCs['Type']=='g'].reset_index(drop=True)

#print(df_GCs.columns)
#print(df_GCs[['Name','RAJ2000','DEJ2000','r2']])


# nearby star-forming galaxies (2012MNRAS.419.2095M)

SFgal = Vizier(columns=["**"], catalog="J/MNRAS/419/2095/sfgal", row_limit=-1).query_constraints()
df_SFgal = SFgal[0].to_pandas()
#print(df_SFgal.columns)
#print(df_SFgal[['Galaxy','Rx','SimbadName']])

df_SFgal['RAJ2000'] = np.nan
df_SFgal['DEJ2000'] = np.nan
df_SFgal['r2'] = df_SFgal['Rx']*2/60 # a factor of 2 to be conservative 
df_SFgal['Name'] = df_SFgal.apply(lambda r: 'NGC_' + r['Galaxy'] if r['Galaxy'][:1].isdigit() else r['Galaxy'],axis=1)

for simbad_name in df_SFgal['SimbadName']:
    df_r = Simbad.query_object(simbad_name).to_pandas()
    df_SFgal.loc[df_SFgal['SimbadName']==simbad_name, 'RAJ2000'] = Angle(df_r['RA'],'hourangle').degree
    df_SFgal.loc[df_SFgal['SimbadName']==simbad_name, 'DEJ2000'] = Angle(df_r['DEC'],'deg').degree
    #print(simbad_name)
    #print(df_r.to_pandas())
    
#print(df_SFgal[['Name','RAJ2000','DEJ2000','r2']])

print(len(df_crowd_fields),len(df_GCs),len(df_SFgal))
df_crowd_fields_all = pd.concat([df_crowd_fields, df_GCs[['Name','RAJ2000','DEJ2000','r2']], df_SFgal[['Name','RAJ2000','DEJ2000','r2']]], ignore_index=True, sort=False)
   

print(df_crowd_fields_all[['Name','RAJ2000','DEJ2000','r2']])


34 147 29
         Name     RAJ2000    DEJ2000        r2
0      NGC_55    3.723333 -39.196667  0.164384
1       IC_10    5.072083  59.303889  0.066667
2     Haro_11    9.219583 -33.554778  0.001667
3        M_31   10.684580  41.269160  1.000000
4         SMC   13.186700 -72.828600  5.330000
..        ...         ...        ...       ...
205  NGC_7090  324.120271 -54.557319  0.103333
206  NGC_7541  348.682737   4.533900  0.046667
207  NGC_7793  359.457308 -32.591028  0.066667
208   UGC5720  158.133150  54.400981  0.020000
209   CARTWHE    9.421305 -33.716254  0.023333

[210 rows x 4 columns]


In [47]:
df_crowd_fields_all[df_crowd_fields_all.duplicated(subset=['Name'],keep=False)].sort_values(by='Name')


Unnamed: 0,Name,RAJ2000,DEJ2000,r2
14,NGC_2403,114.214167,65.6025,0.168675
188,NGC_2403,114.213909,65.602681,0.15
18,NGC_4214,183.913333,36.326944,0.069644
196,NGC_4214,183.913225,36.326889,0.083333
22,NGC_5457,210.802429,54.34875,0.166667
202,NGC_5457,210.802429,54.34875,0.133333
27,NGC_6388,264.072754,-44.73565,0.051667
111,NGC_6388,264.071991,-44.736,0.22
28,NGC_6397,265.175375,-53.674333,0.266667
115,NGC_6397,265.175995,-53.674,0.405


In [48]:
print(len(df_crowd_fields_all))
df_crowd_fields_all = df_crowd_fields_all.sort_values(by='r2',ascending=False).reset_index(drop=True)
df_crowd_fields_all = df_crowd_fields_all[~df_crowd_fields_all.duplicated(subset=['Name'])].reset_index(drop=True)
print(len(df_crowd_fields_all))


210
203


In [49]:

for i in range(10):
    c1 = SkyCoord(ra=df_crowd_fields_all.loc[i, 'RAJ2000']*u.degree, dec=df_crowd_fields_all.loc[i, 'DEJ2000']*u.degree, frame='icrs')
    #print(df_crowd_fields_all.loc[i,:])
    for j in range(len(df_crowd_fields_all)):
        c2 = SkyCoord(ra=df_crowd_fields_all.loc[j, 'RAJ2000']*u.degree, dec=df_crowd_fields_all.loc[j, 'DEJ2000']*u.degree, frame='icrs')
    
        if (c1.separation(c2).degree < df_crowd_fields_all.loc[i, 'r2']) & (j!=i):
            print(c1.separation(c2).degree, '\n', df_crowd_fields_all.loc[j, :])


print(len(df_crowd_fields_all))
df_crowd_fields_all = df_crowd_fields_all[~(df_crowd_fields_all['Name'].isin(['NGC_104','NGC_362','M_13']))].reset_index(drop=True)
print(len(df_crowd_fields_all))    


2.289041868442859 
 Name         NGC_104
RAJ2000        6.004
DEJ2000   -72.081001
r2              0.95
Name: 3, dtype: object
2.14499221821194 
 Name       NGC_362
RAJ2000     15.825
DEJ2000    -70.847
r2           0.235
Name: 33, dtype: object
0.00019073197755802303 
 Name             M_13
RAJ2000    250.421833
DEJ2000     36.459861
r2           0.166667
Name: 72, dtype: object
203
200


In [50]:
TD_simbad_name = TD_simbad[~TD_simbad.duplicated(subset='name')].reset_index(drop=True)
print(len(TD_simbad_name))

ras = np.array(TD_simbad_name['ra'].values)
decs = np.array(TD_simbad_name['dec'].values)
ras_cat =  np.array(df_crowd_fields_all['RAJ2000'].values)
decs_cat =  np.array(df_crowd_fields_all['DEJ2000'].values)

88179


In [51]:

c = SkyCoord(ra=ras*u.degree, dec=decs*u.degree)
catalog = SkyCoord(ra=ras_cat*u.degree, dec=decs_cat*u.degree)
idxs, d2d, d3d = c.match_to_catalog_sky(catalog)
#print(idx, d2d, d3d)
#print(len(idx))

TD_simbad['remove_regions'] = ''

start = time.time()
for i in range(len(TD_simbad_name)):
    #if TD_simbad_name.loc[i, 'name']=='2CXO J010206.6-714116':
        #print(df_crowd_fields_all.loc[idxs[i], :])
    #'''
    if d2d[i].deg < df_crowd_fields_all.loc[idxs[i], 'r2']:
        #print(i, d2d[i].deg, df_crowd_fields_all.loc[idxs[i], 'r2'])
        TD_simbad.loc[TD_simbad['name']==TD_simbad_name.loc[i, 'name'], 'remove_regions'] += '|'+df_crowd_fields_all.loc[idxs[i], 'Name']
    #'''
end = time.time() 
print(end - start)      
        

54.178582191467285


In [52]:
TD_simbad['remove_regions'].value_counts()

             84534
|SMC          1737
|NGC_2264      912
|M_31          712
|M_33          516
             ...  
|NGC_6638        1
|NGC_5946        1
|NGC_6256        1
|NGC_6717        1
|Terzan_3        1
Name: remove_regions, Length: 127, dtype: int64

In [53]:
TD_simbad['GLAT'] = SkyCoord(ra=TD_simbad['ra']*u.degree, dec=TD_simbad['dec']*u.degree, frame='icrs').galactic.l.degree
TD_simbad['GLON'] = SkyCoord(ra=TD_simbad['ra']*u.degree, dec=TD_simbad['dec']*u.degree, frame='icrs').galactic.b.degree

TD_simbad[['ra','dec','GLAT','GLON']]

Unnamed: 0,ra,dec,GLAT,GLON
0,0.039115,13.938493,104.492215,-47.088916
1,0.627974,0.833072,98.134883,-59.662290
2,0.912396,16.039005,106.501181,-45.302672
3,1.594324,-0.073573,99.280958,-60.859243
4,1.747051,-0.294661,99.412899,-61.120224
...,...,...,...,...
93883,185.602940,4.555845,284.608751,66.389505
93884,183.366218,2.960804,280.709825,64.181837
93885,12.541707,-73.325356,303.058260,-43.802585
93886,210.408386,54.556618,102.642273,59.719589


In [54]:
idx = np.where(((TD_simbad.GLAT > 350.) | (TD_simbad.GLAT < 10.)) & (TD_simbad.GLON > -5.) & (TD_simbad.GLON < 5.))[0]
TD_simbad.loc[idx, 'remove_regions'] = TD_simbad.loc[idx, 'remove_regions'] + '|GalacticCenter'

idx = np.where((TD_simbad['name'].str.strip().str[-1].str.isalpha()) & (~TD_simbad['name'].isin(['2CXO J043715.9-471509X'])))[0]
TD_simbad.loc[idx, 'remove_regions'] = TD_simbad.loc[idx, 'remove_regions'] + '|CSCconfused'

In [55]:
TD_simbad['remove_regions'].value_counts()


                         75253
|GalacticCenter           8784
|SMC                      1733
|NGC_2264                  912
|M_31                      709
                         ...  
|NGC_6355                    1
|NGC_5253|CSCconfused        1
|NGC_4697|CSCconfused        1
|NGC_6388|CSCconfused        1
|NGC_5194|CSCconfused        1
Name: remove_regions, Length: 156, dtype: int64

In [56]:
print(TD_simbad.loc[TD_simbad['ref']=='2004ApJ...610.1045S','Class'].value_counts())
#print(TD_simbad.loc[TD_simbad['ref']=='2004ApJ...610.1045S',''].value_counts())
print(TD_simbad.loc[TD_simbad['name'].isin(TD_simbad.loc[TD_simbad['ref']=='2004ApJ...610.1045S','name']), 'Class'].value_counts())

print(TD_simbad.loc[TD_simbad['name'].isin(TD_simbad.loc[TD_simbad['ref']=='2004ApJ...610.1045S','name']), ['name','Class','ref']])

LM-STAR    10
Name: Class, dtype: int64
LM-STAR     11
YSO          5
Star         3
X            2
Orion_V*     1
Name: Class, dtype: int64
                        name     Class                  ref
3224   2CXO J053507.0-052500   LM-STAR  2004ApJ...610.1045S
3229   2CXO J053508.4-052230   LM-STAR  2004ApJ...610.1045S
3234   2CXO J053510.3-052451   LM-STAR  2004ApJ...610.1045S
3238   2CXO J053511.3-052426   LM-STAR  2004ApJ...610.1045S
3240   2CXO J053511.7-052155   LM-STAR  2004ApJ...610.1045S
3264   2CXO J053515.9-052152   LM-STAR  2004ApJ...610.1045S
3279   2CXO J053518.0-052140   LM-STAR  2004ApJ...610.1045S
3309   2CXO J053523.5-052350   LM-STAR  2004ApJ...610.1045S
3314   2CXO J053525.0-052438   LM-STAR  2004ApJ...610.1045S
3318   2CXO J053525.3-052529   LM-STAR     arXiv:2011.14483
3319   2CXO J053525.3-052529   LM-STAR  2004ApJ...610.1045S
5418   2CXO J053525.3-052529       YSO  2012AJ....144..192M
8592   2CXO J053507.0-052500      Star               SIMBAD
15180  2CXO J053511

In [57]:
#TD_simbad.loc[TD_simbad['ref']=='2004ApJ...610.1045S','Class'] = 'YSO' 
TD_simbad.columns

Index(['name_cat', 'ra_cat', 'dec_cat', 'e_Pos', 'Class', 'SubClass', 'ref',
       'sep', 'name', 'ra', 'dec', 'r0', 'r1', 'PA', 'PU', 'nbref',
       'coo_bibcode', 'rare-type', 'remove_regions', 'GLAT', 'GLON'],
      dtype='object')

In [58]:
print(TD_simbad.loc[TD_simbad['name']=='2CXO J171809.8-371851', ['name_cat','name','ra_cat','dec_cat','Class','ref','SubClass','remove_regions']])
print(TD_simbad.loc[TD_simbad['name']=='2CXO J222552.6+653535', ['name_cat','name','Class','ref','SubClass','remove_regions']])
#print(TD_simbad.loc[TD_simbad['name']=='2CXO J063354.2+174616', ['name_cat','name','Class','ref','SubClass','remove_regions']])






        name_cat                   name      ra_cat    dec_cat Class  \
4733  J1718-3718  2CXO J171809.8-371851  259.540942 -37.314305    NS   

                      ref SubClass remove_regions  
4733  2005AJ....129.1993M      NaN                 
           name_cat                   name   Class     ref   SubClass  \
89561  PSR B2223+65  2CXO J222552.6+653535  Pulsar  SIMBAD  Psr|Rad|X   

      remove_regions  
89561                 


In [59]:
print(TD_simbad.loc[(TD_simbad['ref']=='2005JAD....11....2D') & (TD_simbad['remove_regions']==''), ['name','Class','ref','SubClass','remove_regions']])
#print(TD_simbad.loc[(TD_simbad['ref']=='2005JAD....11....2D') & (TD_simbad['SubClass']=='CV') & (TD_simbad['remove_regions']==''), ['name','Class','ref','SubClass','remove_regions']])
print(TD_simbad.loc[(TD_simbad['ref']=='2005JAD....11....2D') & (TD_simbad['remove_regions']==''), ['SubClass']].value_counts())

                       name Class                  ref SubClass remove_regions
1637  2CXO J002257.6+614107    CV  2005JAD....11....2D       CV               
1736  2CXO J012940.0+384210    CV  2005JAD....11....2D     IBWD               
1739  2CXO J020052.2-092431    CV  2005JAD....11....2D     IBWD               
1744  2CXO J025608.1+192634    CV  2005JAD....11....2D       DQ               
1745  2CXO J030346.9+645435    CV  2005JAD....11....2D       CV               
1748  2CXO J033108.1+435750    CV  2005JAD....11....2D      CV:               
1750  2CXO J033111.9+435415    CV  2005JAD....11....2D    NA/DQ               
1754  2CXO J033131.4+435648    CV  2005JAD....11....2D      CV:               
1760  2CXO J052728.2-124150    CV  2005JAD....11....2D      NL:               
1762  2CXO J054320.3-410154    CV  2005JAD....11....2D       DQ               
1765  2CXO J054748.3+283511    CV  2005JAD....11....2D       UG               
1772  2CXO J080622.9+152731    CV  2005JAD....11....

In [3]:




df_rare_sources_saving = pd.DataFrame(rare_sources_saving_dict).T



#df_rare_sources_saving.reset_index(inplace=True)
#df_rare_sources_saving = df_rare_sources_saving.rename(columns = {'index':'name'})

df_rare_sources_saving

Unnamed: 0,Class,Comment
2CXO J073751.2-303940,NS,change from NS_BIN to NS since it is a double ...
2CXO J153709.9+115555,NS,change from NS_BIN to NS since it is a double ...
2CXO J112401.1-365319,NS_BIN,change from NS to NS_BIN as a black widow pulsar
2CXO J185843.6+032606,LMXB,2021ApJ...909..154T
2CXO J130247.6-635008,HMXB,recorded in the ATNF catalog
...,...,...
2CXO J191404.2+095258,HMXB,confused with 2CXO J191404.2+095258X.
2CXO J193030.1+185214,NS,confused with 2CXO J193029.9+185213X.
2CXO J202105.4+365104,NS,confused with 2CXO J202105.4+365104X.
2CXO J222552.8+653536,NS,same NS as 2CXO J222552.6+653535 in different ...


In [61]:
TD_simbad[(TD_simbad.name.isin(TD_simbad.loc[TD_simbad['ref']=='2020RNAAS...4..219J', 'name'])) & (TD_simbad.name.isin(TD_simbad.loc[TD_simbad['Class']=='CV*_Candidate', 'name'])) ]

Unnamed: 0,name_cat,ra_cat,dec_cat,e_Pos,Class,SubClass,ref,sep,name,ra,...,r0,r1,PA,PU,nbref,coo_bibcode,rare-type,remove_regions,GLAT,GLON


In [62]:
TD_simbad.columns

Index(['name_cat', 'ra_cat', 'dec_cat', 'e_Pos', 'Class', 'SubClass', 'ref',
       'sep', 'name', 'ra', 'dec', 'r0', 'r1', 'PA', 'PU', 'nbref',
       'coo_bibcode', 'rare-type', 'remove_regions', 'GLAT', 'GLON'],
      dtype='object')

In [63]:
ref_changes = {'Simbad':'old-SIMBAD', 
               'The Open Cataclysmic Variable Catalog': '2020RNAAS...4..219J',
               'Open CV catalog': '2020RNAAS...4..219J',
               'arXiv:2008.09917': '2020A&A...642A.168B',
               'arXiv:2011.14483': '2021ApJ...908...49F',
               'arXiv:2103.00196': '2021A&A...648A..34P',
               'arXiv:2110.01464': '2022A&A...657A.131M'}

for ref in ref_changes:
    TD_simbad.loc[TD_simbad['ref']==ref, 'ref'] = ref_changes[ref]
    
TD_simbad = TD_simbad.sort_values(by=['name','ref','e_Pos']).reset_index(drop=True)




#print(TD_simbad[['name','ref','e_Pos']][100:150])




In [64]:
TD_simbad[(TD_simbad['remove_regions']=='|GalacticCenter') & (TD_simbad['rare-type']==1)]

Unnamed: 0,name_cat,ra_cat,dec_cat,e_Pos,Class,SubClass,ref,sep,name,ra,...,r0,r1,PA,PU,nbref,coo_bibcode,rare-type,remove_regions,GLAT,GLON
70293,4U 1705-32,257.226667,-32.315972,,LMXB,LXB|N*?|X|gam,SIMBAD,1.763204,2CXO J170854.2-321856,257.226113,...,1.227,1.227,0.000000,1.227000,56.0,2005A&A...432L..49S,1,|GalacticCenter,352.793524,4.681810
70489,4U 1711-34,258.582417,-34.046472,,LMXB,LXB|X|gam,SIMBAD,0.699489,2CXO J171419.7-340246,258.582251,...,0.714,0.713,44.020000,0.714000,43.0,2007A&A...469..807L,1,|GalacticCenter,352.060411,2.745959
70872,XTE J1719-291,259.820708,-29.069542,,XB,HX?|X|XB*,SIMBAD,2.733208,2CXO J171917.1-290409,259.821564,...,0.711,0.711,65.419998,0.711000,18.0,2008ATel.1451....1D,1,|GalacticCenter,356.745498,4.753352
71150,2MASS J17200591-3116596,260.024625,-31.283222,0.06,HMXB,HXB|IR|X|gam,SIMBAD,0.179051,2CXO J172005.9-311659,260.024608,...,0.713,0.713,0.000000,0.723028,34.0,2003yCat.2246....0C,1,|GalacticCenter,355.022141,3.347450
71718,J1723-2837,260.846607,-28.632547,0.11,NS_BIN,,2005AJ....129.1993M,0.348727,2CXO J172323.1-283757,260.846644,...,0.710,0.710,0.000000,0.971648,,,1,|GalacticCenter,357.616243,4.260026
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80601,SAX J1810.8-2609,272.685292,-26.150333,,LMXB,"T, B",2007A&A...469..807L,0.147194,2CXO J181044.4-260901,272.685276,...,0.719,0.713,93.280000,0.719000,,,1,|GalacticCenter,5.197213,-3.431088
80602,SAX J1810.8-2609,272.685000,-26.150000,,LMXB,LMXB,2018ApJS..235....4O,1.609226,2CXO J181044.4-260901,272.685276,...,0.719,0.713,93.280000,0.719000,,,1,|GalacticCenter,5.197213,-3.431088
80603,V* V4722 Sgr,272.685292,-26.150333,,LMXB,LXB|V*|X|gam,SIMBAD,0.147180,2CXO J181044.4-260901,272.685276,...,0.719,0.713,93.279999,0.719000,88.0,2007A&A...469..807L,1,|GalacticCenter,5.197213,-3.431088
81881,V4641 Sgr,274.840000,-25.406944,1.00,LMXB,XT,2003A&A...404..301R,0.894903,2CXO J181921.6-252425,274.840069,...,0.711,0.711,104.100000,2.122621,,,1,|GalacticCenter,6.773924,-4.789018


In [None]:
TD_simbad.to_csv(f'./data/{field_name}_SIMBAD.csv',index=False)

In [8]:
TD_simbad = pd.read_csv(f'./data/{field_name}_SIMBAD.csv')
#TD_simbad.loc[TD_simbad['name'].isin(['2CXO J205107.5-082737','2CXO J224142.0-523636']),:]


In [12]:
print(TD_simbad.loc[TD_simbad['Class']=='LM-STAR', 'ref'].value_counts()[:50])

print(TD_simbad.loc[TD_simbad['Class']=='HM-STAR', 'ref'].value_counts()[:50])

LAMOST-DR8-AFGK        549
2020AJ....160..120J    400
LAMOST-DR8-M           317
2021ApJ...908...49F     44
2002AJ....123.1528R     27
2006ApJ...646.1215L     19
1939AnBos...5E...1W     18
2003ApJ...593.1093L     18
1982MSS...C03....0H     15
1975MSS...C01....0H     12
1978MSS...C02....0H     12
1958PMcCO..13b....V     11
2004ApJ...610.1045S     10
2004AJ....127.1131W      9
2003AJ....125.1480A      8
1956PMcCO..13a....B      7
1952CoRut..32....1H      7
1940ApJ....91..244E      7
2006A&A...460..695T      7
1954StoAn..18a...1L      5
1988MSS...C04....0H      5
1936AnHar.100..205C      5
1935MeUpp..61....1S      5
1926AnHar.100...17C      5
1928AnHar.100...49C      5
1987ApJS...64..241S      4
1961AbaOB..26...35K      4
2004AJ....128.1233H      4
1978PASP...90..144Y      4
1957ApJ...125..636W      4
2003AJ....125.2134G      4
2004ApJ...611.1107F      4
2002A&A...393..195M      4
2003MNRAS.341..805P      4
1961ApJ...133..438W      3
1939ApJ....89..431E      3
1955IzKry..14....3B      3
1

In [5]:
df_TD_single =  TD_simbad[~(TD_simbad.duplicated(subset=['name'],keep=False)) ].reset_index(drop=True)

df_TD_single_keep = df_TD_single[((df_TD_single['name'].isin(rare_sources_saving_dict)) | (df_TD_single['ref']!='SIMBAD')) ].reset_index(drop=True)

df_TD_single_keep.set_index('name', inplace=True)
df_TD_single_keep.update(df_rare_sources_saving)
df_TD_single_keep.reset_index(inplace=True)  # to recover t
#df_TD_single_keep = df_TD_single_keep.rename(columns = {'index':'name'})

#print(df_TD_single_keep.columns)
print(df_TD_single_keep['Class'].value_counts())

LM-STAR    78
NS         18
HMXB        8
LMXB        4
CV          4
NS_BIN      3
AGN         3
HM-STAR     2
Name: Class, dtype: int64


In [6]:

df_TD_multi = TD_simbad[ (TD_simbad.duplicated(subset=['name'],keep=False)) ].reset_index(drop=True)
#print(len(TD_simbad), len(df_TD_single), len(df_TD_multi))

df_TD_multi_save = df_TD_multi[df_TD_multi['name'].isin(rare_sources_saving_dict)].reset_index(drop=True)
#print(len(df_TD_multi_save))

df_TD_multi_save.set_index('name', inplace=True)
df_TD_multi_save.update(df_rare_sources_saving)
df_TD_multi_save.reset_index(inplace=True)  # to recover t



df_TD_multi_save_final = df_TD_multi_save[ ~(df_TD_multi_save.duplicated(subset=['name'])) ].reset_index(drop=True)
df_TD_multi_save['SubClass'] = df_TD_multi_save['SubClass'].fillna('')
for i, row in df_TD_multi_save_final.iterrows():
    
    #print(row['name'])
    #refs = []
    #print(df_TD_multi_save.loc[df_TD_multi_save['name']==row['name'], 'ref'].values)
    refs = ";".join(df_TD_multi_save.loc[df_TD_multi_save['name']==row['name'], 'ref'].values)
    SubClasses = ";".join(df_TD_multi_save.loc[df_TD_multi_save['name']==row['name'], 'SubClass'].values)
    #print(df_TD_multi_save.loc[df_TD_multi_save['name']==row['name'], 'SubClass'].values)
    #print(refs)
    #print(SubClasses)
    df_TD_multi_save_final.loc[i, 'refs'] = refs
    df_TD_multi_save_final.loc[i, 'SubClasses'] = SubClasses


#print(len(df_TD_multi_save['name'].unique()))

#print(df_TD_multi_save_final['Class'].value_counts())
#for name in df_TD_multi_save['name'].unique()



#

In [7]:
df_TD_multi_save_final[['ref','refs','SubClass','SubClasses']]

df_TD_multi_save_final = df_TD_multi_save_final.drop(columns=['ref','SubClass']).rename(columns={'refs':'ref', 'SubClasses':'SubClass'})

df_TD_multi_save_final[['ref','SubClass']]

Unnamed: 0,ref,SubClass
0,2005AJ....129.1993M;SIMBAD,HE;
1,2005JAD....11....2D;SIMBAD,CV;*|CV*|IR|PN|UV|V*
2,2005JAD....11....2D;2020RNAAS...4..219J;SIMBAD,CV:;CataclyV;*|CV*|IR|X
3,2003A&A...404..301R;2007A&A...469..807L;SIMBAD,"XT;T, R;*|BH?|HXB|LXB|No*|V*|X|gam"
4,2006A&A...455.1165L;2018ApJS..235....4O;SIMBAD...,"T, P, C;HMXB;*|Be*|Em*|Er*|HXB|IR|N*|Psr|Rad|S..."
...,...,...
85,2020RNAAS...4..219J;SIMBAD,CataclyV;CV*|X
86,2005JAD....11....2D;SIMBAD,CV:;*|IR|WD*
87,2005AJ....129.1993M;SIMBAD,HE;Pl?
88,2020RNAAS...4..219J;SIMBAD,CataclyV;*|CV*|EB*|EB?|IR|PM*|V*|WD*


In [8]:
df_TD_multi_check = df_TD_multi[~df_TD_multi['name'].isin(rare_sources_saving_dict)].reset_index(drop=True)
df_TD_multi_check['comment'] = ''

for name_uniq in df_TD_multi_check['name'].unique():
    
    dup = df_TD_multi_check[df_TD_multi_check['name']==name_uniq]
    if 'SIMBAD' in dup['ref'].unique():
        if len(dup)==2:
            other_class = dup.loc[dup['ref']!='SIMBAD', 'Class'].values
            #print(other_class)
            if other_class[0] == 'HM-STAR' or other_class[0] == 'LM-STAR':
                if dup.loc[dup['ref']=='SIMBAD', 'Class'].values not in Simbad_dict[other_class[0]] +  Simbad_dict['Common']+Simbad_dict['STAR']:
                    #print(dup[['Class','SubClass','ref','name_cat','ra_cat','dec_cat','sep']])
                    df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] = df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] + '|1'
                            
            elif other_class[0] == 'HMXB' or other_class[0] == 'LMXB':
                if dup.loc[dup['ref']=='SIMBAD', 'Class'].values not in Simbad_dict[other_class[0]] +  Simbad_dict['Common']+ Simbad_dict['XRB']:
                    #print(dup[['Class','SubClass','name_cat','ra_cat','dec_cat','sep']])
                    df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] = df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] + '|2'
      
            else:       
                if dup.loc[dup['ref']=='SIMBAD', 'Class'].values not in Simbad_dict[other_class[0]] +  Simbad_dict['Common']:
                    #print(dup[['Class','SubClass','name_cat','ra_cat','dec_cat','sep']])
                    df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] = df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] + '|3'
        else:
            other_class = dup.loc[dup['ref']!='SIMBAD', 'Class'].unique()
            
            if len(other_class)>1:
                #print('>3!!!', dup[['Class','SubClass','ref','name_cat','ra_cat','dec_cat']])
                df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] = df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] + '|4'

            else:
                if other_class[0] == 'HM-STAR' or other_class[0] == 'LM-STAR':
                    if dup.loc[dup['ref']=='SIMBAD', 'Class'].values not in Simbad_dict[other_class[0]] +  Simbad_dict['Common']+Simbad_dict['STAR']:
                        #print(dup[['Class','SubClass','name_cat','ra_cat','dec_cat','sep']])
                        df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] = df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] + '|5'

                elif other_class[0] == 'HMXB' or other_class[0] == 'LMXB':
                    if dup.loc[dup['ref']=='SIMBAD', 'Class'].values not in Simbad_dict[other_class[0]] +  Simbad_dict['Common']+ Simbad_dict['XRB']:
                        #print(dup[['Class','SubClass','name_cat','ra_cat','dec_cat','sep']])
                        df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] = df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] + '|6'

                else:       
                    if dup.loc[dup['ref']=='SIMBAD', 'Class'].values not in Simbad_dict[other_class[0]] +  Simbad_dict['Common']:
                        #print(dup[['Class','SubClass','name_cat','ra_cat','dec_cat','sep']])
                        df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] = df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] + '|7'

        

    else:
        other_class = dup['Class'].unique()
        if len(other_class)>1:
            
            #print('ha',dup[['Class','SubClass','ref','name_cat','ra_cat','dec_cat']])
            df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] = df_TD_multi_check.loc[df_TD_multi_check['name']==name_uniq, 'comment'] + '|8'

        
#'''    


In [9]:
print(len(df_TD_multi_check[df_TD_multi_check['comment']=='']))
print(len(df_TD_multi_check[df_TD_multi_check['remove_regions']=='']))
print(len(df_TD_multi_check[(df_TD_multi_check['remove_regions']=='') & (df_TD_multi_check['comment']=='')]))



df_TD_multi_keep = df_TD_multi_check[(df_TD_multi_check['comment']=='')]
df_TD_multi_keep_final = df_TD_multi_keep[~df_TD_multi_keep.duplicated(subset=['name'])].reset_index(drop=True)



df_TD_multi_keep['SubClass'] = df_TD_multi_keep['SubClass'].fillna('')
for i, row in df_TD_multi_keep_final.iterrows():
    
    #print(row['name'])
    #refs = []
    #print(df_TD_multi_save.loc[df_TD_multi_save['name']==row['name'], 'ref'].values)
    refs = ";".join(df_TD_multi_keep.loc[df_TD_multi_keep['name']==row['name'], 'ref'].values)
    SubClasses = ";".join(df_TD_multi_keep.loc[df_TD_multi_keep['name']==row['name'], 'SubClass'].values)
    #print(df_TD_multi_save.loc[df_TD_multi_save['name']==row['name'], 'SubClass'].values)
    #print(refs)
    #print(SubClasses)
    df_TD_multi_keep_final.loc[i, 'refs'] = refs
    df_TD_multi_keep_final.loc[i, 'SubClasses'] = SubClasses


print(len(df_TD_multi_keep))

7920
0
0
7920


In [10]:
print(df_TD_multi_keep_final[['ref','refs','SubClass','SubClasses']])

df_TD_multi_keep_final = df_TD_multi_keep_final.drop(columns=['ref','SubClass']).rename(columns={'refs':'ref', 'SubClasses':'SubClass'})

print(df_TD_multi_keep_final[['ref','SubClass']])


                      ref                                            refs  \
0     2010A&A...518A..10V                      2010A&A...518A..10V;SIMBAD   
1     2010A&A...518A..10V                      2010A&A...518A..10V;SIMBAD   
2     2010A&A...518A..10V                      2010A&A...518A..10V;SIMBAD   
3     2010A&A...518A..10V                      2010A&A...518A..10V;SIMBAD   
4     2010A&A...518A..10V                      2010A&A...518A..10V;SIMBAD   
...                   ...                                             ...   
3608  2010A&A...518A..10V                      2010A&A...518A..10V;SIMBAD   
3609  2010A&A...518A..10V                      2010A&A...518A..10V;SIMBAD   
3610  2010A&A...518A..10V  2010A&A...518A..10V;2018ApJS..235....4O;SIMBAD   
3611  2010A&A...518A..10V                      2010A&A...518A..10V;SIMBAD   
3612  1982MSS...C03....0H                      1982MSS...C03....0H;SIMBAD   

     SubClass                         SubClasses  
0           Q           

In [11]:
#print(df_TD_single_keep['Class'].value_counts())
#print(df_TD_multi_save['Class'].value_counts())
#print(df_TD_multi_keep['Class'].value_counts())


df_TD_final = pd.concat([df_TD_single_keep, df_TD_multi_save_final, df_TD_multi_keep_final], ignore_index=True, sort=False)



In [12]:
print(df_TD_final.loc[df_TD_final['name'].isin(rare_sources_saving_dict), ['remove_regions']].value_counts())
#df_TD_multi_keep_final#[df_TD_multi_keep_final['name']=='2CXO J153709.9+115555']

remove_regions 
|GalacticCenter    11
dtype: int64


In [13]:
#print(df_TD_final.loc[df_TD_final['remove_regions']!='', 'name'].values)
print(df_TD_final[(df_TD_final['remove_regions']=='|GalacticCenter') & (df_TD_final['rare-type']==1) & (~df_TD_final['name'].isin(rare_sources_saving_dict))])
print(df_TD_final.loc[(df_TD_final['remove_regions']=='|GalacticCenter') & (df_TD_final['rare-type']==1) & (~df_TD_final['name'].isin(rare_sources_saving_dict)), 'Class'].value_counts())
#df_TD_final[(df_TD_final['remove_regions']=='|GalacticCenter') & (df_TD_final['rare-type']==1) & (~df_TD_final['name'].isin(rare_sources_saving_dict))].to_csv('./data/TD_raretype_inGalacticPlane.csv',index=False)
#print(df_TD_final['remove_regions'].value_counts())

                       name         name_cat      ra_cat    dec_cat  e_Pos  \
86    2CXO J173233.5-313123       J1732-3131  263.139750 -31.523056  2.000   
87    2CXO J174157.2-205412       J1741-2054  265.488680 -20.903278  0.045   
89    2CXO J174618.4-220946        AT2021kwb  266.577108 -22.163050    NaN   
90    2CXO J174722.7-280914       J1747-2809  266.845000 -28.154167  0.500   
91    2CXO J174726.1-295957     SLX 1744-299  266.857917 -29.999389    NaN   
...                     ...              ...         ...        ...    ...   
3374  2CXO J180451.3-274512  OGLE-BLG-DN-645  271.213958 -27.753333    NaN   
3375  2CXO J180457.9-283353  OGLE-BLG-DN-651  271.241500 -28.564694    NaN   
3376  2CXO J180507.0-274309       J1805-2743  271.279583 -27.719167  1.000   
3377  2CXO J180540.4-273425  OGLE-BLG-DN-673  271.418580 -27.574288    NaN   
3386  2CXO J180950.2-233222       J1809-2332  272.459375 -23.539633  0.450   

     Class                    SubClass  \
86      NS           

In [14]:
#df_TD_final = df_TD_final[df_TD_final['remove_regions']==''].reset_index(drop=True)
print(len(df_TD_single_keep),len(df_TD_multi_save),len(df_TD_multi_keep),len(df_TD_final))

print(df_TD_final['Class'].value_counts())

120 241 7920 3823
AGN        1573
YSO         752
LM-STAR     741
CV          235
HM-STAR     167
NS          134
LMXB        101
HMXB         78
NS_BIN       42
Name: Class, dtype: int64


In [15]:
df_TD_final.columns

Index(['name', 'name_cat', 'ra_cat', 'dec_cat', 'e_Pos', 'Class', 'SubClass',
       'ref', 'sep', 'ra', 'dec', 'r0', 'r1', 'PA', 'PU', 'nbref',
       'coo_bibcode', 'rare-type', 'remove_regions', 'GLAT', 'GLON',
       'comment'],
      dtype='object')

In [16]:
df_rare_save = pd.DataFrame(rare_sources_saving_dict).T
print(df_rare_save)
df_rare_save.reset_index(inplace=True)
df_rare_save =  df_rare_save.rename(columns={'index':'name'}) 
print(len(df_rare_save['name'].unique()))

df_rare_remove = pd.DataFrame(rare_sources_removed_dict.items(), columns=['name', 'comment'])

#df_rare_remove = pd.DataFrame.from_dict(rare_sources_removed_dict) #pd.DataFrame(rare_sources_removed_dict).T
print(len(df_rare_remove))
#df_rare_remove.reset_index(inplace=True)
#df_rare_remove =  df_rare_remove.rename(columns={'index':'name'}) 
print(df_rare_remove)
print(len(df_rare_remove['name'].unique()))
print(df_rare_remove[df_rare_remove['name'].isin(df_rare_save['name'])])
print(df_rare_save[df_rare_save['name'].isin(df_rare_remove['name'])])

                        Class  \
2CXO J073751.2-303940      NS   
2CXO J153709.9+115555      NS   
2CXO J112401.1-365319  NS_BIN   
2CXO J185843.6+032606    LMXB   
2CXO J130247.6-635008    HMXB   
...                       ...   
2CXO J191404.2+095258    HMXB   
2CXO J193030.1+185214      NS   
2CXO J202105.4+365104      NS   
2CXO J222552.8+653536      NS   
2CXO J222552.6+653535      NS   

                                                                 Comment  
2CXO J073751.2-303940  change from NS_BIN to NS since it is a double ...  
2CXO J153709.9+115555  change from NS_BIN to NS since it is a double ...  
2CXO J112401.1-365319   change from NS to NS_BIN as a black widow pulsar  
2CXO J185843.6+032606                                2021ApJ...909..154T  
2CXO J130247.6-635008                       recorded in the ATNF catalog  
...                                                                  ...  
2CXO J191404.2+095258              confused with 2CXO J191404.2+095258X.  
2CX

In [17]:
TD_rare_GB = df_TD_final[(~df_TD_final['name'].isin(rare_sources_saving_dict)) & (df_TD_final['rare-type']==1) & (df_TD_final['remove_regions']=='|GalacticCenter')].reset_index(drop=True)

print(TD_rare_GB)

TD_rare_GB.to_csv(f'{data_dir}/{field_name}_raretype_GalacticBulge.csv',index=False)


                     name         name_cat      ra_cat    dec_cat  e_Pos  \
0   2CXO J173233.5-313123       J1732-3131  263.139750 -31.523056  2.000   
1   2CXO J174157.2-205412       J1741-2054  265.488680 -20.903278  0.045   
2   2CXO J174618.4-220946        AT2021kwb  266.577108 -22.163050    NaN   
3   2CXO J174722.7-280914       J1747-2809  266.845000 -28.154167  0.500   
4   2CXO J174726.1-295957     SLX 1744-299  266.857917 -29.999389    NaN   
..                    ...              ...         ...        ...    ...   
88  2CXO J180451.3-274512  OGLE-BLG-DN-645  271.213958 -27.753333    NaN   
89  2CXO J180457.9-283353  OGLE-BLG-DN-651  271.241500 -28.564694    NaN   
90  2CXO J180507.0-274309       J1805-2743  271.279583 -27.719167  1.000   
91  2CXO J180540.4-273425  OGLE-BLG-DN-673  271.418580 -27.574288    NaN   
92  2CXO J180950.2-233222       J1809-2332  272.459375 -23.539633  0.450   

   Class                    SubClass  \
0     NS                          HE   
1     N

In [18]:
#df_TD_final = df_TD_final[(df_TD_final['name'].isin(rare_sources_saving_dict)) | (df_TD_final['remove_regions']=='')].reset_index(drop=True)



In [19]:
df_TD_final = df_TD_final[~df_TD_final['name'].isin(df_rare_remove['name'])].reset_index(drop=True)

df_TD_final.to_csv(f'{data_dir}/{field_name}_final.csv',index=False)

In [20]:
print(len(df_TD_final))

print(df_TD_final['remove_regions'].value_counts())
print(df_TD_final[df_TD_final['remove_regions']!=''])

3797
|GalacticCenter                142
|SMC                            73
|Westerlund_1                   19
|LMC                            19
|NGC_6397                       11
|NGC_6440|GalacticCenter        10
|M_31                           10
|NGC_6752                        9
|Terzan_5|GalacticCenter         8
|NGC_6715                        7
|NGC_6121                        6
|NGC_6093                        6
|NGC_6791                        5
|NGC_6388                        4
|GalacticCenter|CSCconfused      4
|NGC_5139                        3
|Circinus                        3
|NGC_6266                        3
|NGC_3201                        3
|NGC_3079                        3
|NGC_2264                        3
|M_33                            3
|NGC_6341                        3
|NGC_6522|GalacticCenter         2
|NGC_6205                        2
|NGC_6656                        2
|M_81                            2
|NGC_2808                        2
|CSCconfused   

In [21]:
# still want to save ??
# 2CXO J102347.6+003840
# 2CXO J133001.0+471343



# accept TD_rare_multi_good.csv, but still remove remove_reasons with 18 

# maybe 2CXO J130848.1+212707 a new NS PSR J1308+2127 (2012MNRAS.419.1525H)
# maybe 2CXO J184625.8+091949 is a new NS PSR J1846+0919 (Fermi source)


# many are from LMXBs in early-type galaxies. I. Chandra (Humphrey+, 2008)
# remove from 2012MNRAS.419.2095M

# keeping 

# 2012ApJ...759..123S: DEM L241, a supernova remnant containing a high-mass X-ray binary


# 47_Tuc
# 15.*(24+05.67/60)/60
# -(72+(4+52.6/60)/60)
# 43.8/60

# accept TD_rare_multi_good.csv, but still remove remove_reasons with 18 

# maybe 2CXO J130848.1+212707 a new NS PSR J1308+2127 (2012MNRAS.419.1525H)
# maybe 2CXO J184625.8+091949 is a new NS PSR J1846+0919 (Fermi source)


# many are from LMXBs in early-type galaxies. I. Chandra (Humphrey+, 2008)
# remove from 2012MNRAS.419.2095M

## do not use 2020AJ....159...43H CVs from LAMOST with Machine learning

### Cross-matching using nway and updating for those match_flag = 2, matching to Gaiadist

In [18]:
TD_MW = pd.read_csv(f'../nway_cross-matching/{field_name}_MW_update.csv')
#print(TD_MW[TD_MW['name']=='2CXO J143308.3-611540'])
#print(len(TD_MW['CSC__2CXO'].unique()))
TD_gaia = TD_MW[TD_MW['GAIA_Source']!=-99].reset_index(drop=True)[['CSC__2CXO','GAIA_RA', 'GAIA_DEC','GAIA_Source']]
#print(len(TD_gaia),TD_gaia['GAIA_RA'].describe())
#print(TD_gaia.columns)
TD_gaia['_q'] = TD_gaia.index+1

#'''
# cross-matching to Gaiadist catalog

viz = Vizier(row_limit=-1,  timeout=5000, columns=["**", "+_r"], catalog='I/352/gedr3dis')

radec = [[TD_gaia.loc[i, 'GAIA_RA'], TD_gaia.loc[i, 'GAIA_DEC']] for i in range(len(TD_gaia))]
rd = Table(Angle(radec, 'deg'), names=('_RAJ2000', '_DEJ2000'))
df_gaiadist = viz.query_region(rd, radius=0.1*u.arcsec)[0].to_pandas()
#'''
#print(len(df_gaiadist))
TD_gaia = pd.merge(TD_gaia, df_gaiadist, on=['_q'], how='inner').rename(columns={'Flag':'gaiadist_flag'})
#print(len(TD_gaia))

TD_MW = pd.merge(TD_MW, TD_gaia[['CSC__2CXO','rgeo','b_rgeo','B_rgeo','rpgeo','b_rpgeo','B_rpgeo','gaiadist_flag']], on='CSC__2CXO',how='outer')



In [19]:
TD_MW[TD_MW['name']=='2CXO J143308.3-611540']

Unnamed: 0,CSC_ID,CSC_RA,CSC_DEC,CSC_err_r0,CSC_err_r1,CSC_PA,CSC__2CXO,CSC__r,CSC_fe,CSC_fc,...,match_flag,name,Gaia,rgeo,b_rgeo,B_rgeo,rpgeo,b_rpgeo,B_rpgeo,gaiadist_flag
3583,1,218.284719,-61.261119,0.726,0.719,131.899994,J143308.3-611540,0.03,0,0,...,1,2CXO J143308.3-611540,Gaia DR3 5878377736381364608,2551.24365,1986.17004,3161.19238,2224.22437,1813.85144,2861.979,10033.0


In [34]:

df_pers = create_perobs_data(TD_MW, query_dir, data_dir, name_type='CSCview', name_col='name', ra_col='CSC_RA',dec_col='CSC_DEC',coord_format='deg',engine='wget')
df_pers.to_csv(f'{data_dir}/{field_name}_per.csv', index=False)


# Calculate the weighted average fluxes and variability parameters for X-ray properties
# Will generate warnings but they can be ignored.

df_pers = pd.read_csv(f'{data_dir}/{field_name}_per.csv', low_memory=False)

df_pers['name'] = df_pers['name'].str.lstrip()
df_pers['per_remove_code'] = 0

newdata_dir = '/Users/huiyang/Research/GitHub/MUWCLASS_CSCv2/codes/buildTD/data'
df_ave, df_obs = cal_ave(df_pers, data_dir=newdata_dir, dtype='TD',Chandratype='CSC')


#df_ave = pd.merge(df_ave, df_TD_final.iloc[:, np.r_[:9]], how='inner', on='name')
#print(Counter(df_ave.Class))
df_ave.to_csv(f'{data_dir}/{field_name}_ave.csv', index=False)



Operating system is: posix
J143308.3-611540


Resolving cda.cfa.harvard.edu (cda.cfa.harvard.edu)... 131.142.198.67
Connecting to cda.cfa.harvard.edu (cda.cfa.harvard.edu)|131.142.198.67|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: ‘../data/query/J143308.3-611540.txt’

     0K .......... .......... .......... ......                1.22M=0.03s

2022-12-02 10:57:00 (1.22 MB/s) - ‘../data/query/J143308.3-611540.txt’ saved [37423]

FINISHED --2022-12-02 10:57:00--
Total wall clock time: 0.4s
Downloaded: 1 files, 37K in 0.03s (1.22 MB/s)


There are 20185 per-obs data.
Run add_newdata......
Before adding new data:
Run stats......
 H   M   S    #    %  
--- --- --- ----- ----
  Y   Y   Y 15684 77.7
  Y   Y   N   944  4.6
  N   Y   Y   585  2.8
  Y   N   Y   334  1.6
  N   Y   N    99  0.4
  Y   N   N   481  2.3
  N   N   Y   264  1.3
  N   N   N  1794  8.8
 ~Y   Y   Y  4501 22.2
-----------------
total:      20185
Only  15684  detections have valid fluxes at all bands.
After adding new  2289 s band data:
Run stats......
 H   M   S    #    %  
--- --- --- ----- ----
  Y   Y   Y 16154 80.0
  Y   Y   N   474  2.3
  N   Y   Y   618  3.0
  Y   N   Y   519  2.5
  N   Y   N    66  0.3
  Y   N   N   296  1.4
  N   N   Y   381  1.8
  N   N   N  1677  8.3
 ~Y   Y   Y  4031 19.9
-----------------
total:      20185
Only  16154  detections have valid fluxes at all bands.
After adding new  1420 m band data:
Run stats......
 H   M   S    #    %  
--- --- --- ----- ----
  Y   Y   Y 16423 81.3
  Y   Y   N   475  2.3
  N   Y   Y   767  3.7

In [20]:
df_ave = pd.read_csv(f'{data_dir}/{field_name}_ave.csv')
df_ave#.columns

Unnamed: 0,name,usrid,ra_pnt,dec_pnt,ra,dec,err_ellipse_r0,err_ellipse_r1,err_ellipse_ang,significance,...,flux_aper90_ave_h,e_flux_aper90_ave_h,chisqr,dof,kp_prob_b_max,var_inter_prob,significance_max,flux_flag,flux_aper90_ave_b,e_flux_aper90_ave_b
0,2CXO J000150.5+233015,1,00 01 40.98,+23 29 24.37,0.460542,23.504339,0.81,0.75,141.3,3.88,...,7.978846e-18,6.028103e-18,,-1.0,,,,4,2.412479e-15,6.577192e-16
1,2CXO J000701.5+730308,2,00 07 07.79,+73 03 09.89,1.756542,73.052258,0.71,0.71,0.0,15.50,...,2.488798e-14,3.525002e-15,,0.0,0.732,,15.50,0,3.681678e-14,3.699694e-15
2,2CXO J001826.4+300400,3,00 18 20.86,+30 05 39.11,4.610042,30.066714,0.91,0.82,79.4,2.10,...,1.314160e-15,1.134900e-15,,0.0,0.838,,2.10,0,2.769013e-15,1.372724e-15
3,2CXO J005240.0+562837,4,00 52 25.24,+56 33 47.56,13.166792,56.477003,0.86,0.77,40.1,8.69,...,1.801145e-15,8.182476e-16,0.256757,2.0,0.934,0.120,6.80,0,9.455264e-15,1.219600e-15
4,2CXO J005510.9-373854,5,00 54 53.05,-37 38 42.09,13.795750,-37.648475,0.72,0.72,36.7,7.26,...,7.978846e-18,6.028103e-18,,-1.0,,,,6,9.823934e-15,2.879636e-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3506,2CXO J005153.0-723148,3793,00 53 11.45,-72 26 29.91,12.971250,-72.530239,0.97,0.79,164.1,11.61,...,2.603043e-14,3.132892e-15,49.234283,2.0,0.999,1.000,9.13,0,3.225279e-14,3.244835e-15
3507,2CXO J005130.8-730123,3794,00 51 54.24,-73 00 52.21,12.878417,-73.023089,0.73,0.73,51.6,5.92,...,6.798283e-16,4.190849e-16,2.088944,1.0,0.936,0.852,4.37,0,4.571996e-15,8.223908e-16
3508,2CXO J002414.4-715848,3795,00 23 54.47,-72 04 22.97,6.060000,-71.980267,0.84,0.78,175.7,10.08,...,9.364890e-16,5.006469e-16,0.612041,1.0,0.040,0.566,7.86,0,1.648000e-14,1.699366e-15
3509,2CXO J002407.1-720545,3796,00 23 54.52,-72 04 22.94,6.029833,-72.096075,0.71,0.71,44.5,18.17,...,9.574755e-15,8.342288e-16,179.493788,9.0,0.924,1.000,15.04,0,1.540087e-14,8.849684e-16


# filter on remove_regions and save_sources, combine all columns

In [21]:
df_TD_final = pd.read_csv(f'{data_dir}/{field_name}_final.csv')
#print(df_TD_final)

TD_final = df_TD_final[(df_TD_final['remove_regions']=='') | (df_TD_final['remove_regions'].isnull()) |(df_TD_final['name'].isin(rare_sources_saving_dict)) ].reset_index(drop=True)
print(len(TD_final))
print(TD_final['Class'].value_counts())

#print(TD_final.columns)

TD_final = pd.merge(TD_final[['name','name_cat','ra_cat','dec_cat','Class','SubClass','ref','GLAT','GLON']], df_ave, on='name',how='inner')

TD_MW['name'] = '2CXO '+TD_MW['CSC__2CXO']

TD_final = pd.merge(TD_final, TD_MW, on='name',how='inner')

print(len(TD_final))
print(TD_final['Class'].value_counts())



3411
AGN        1554
YSO         752
LM-STAR     672
HM-STAR     133
NS          109
CV           61
LMXB         54
HMXB         52
NS_BIN       24
Name: Class, dtype: int64
3163
AGN        1405
YSO         748
LM-STAR     643
HM-STAR     129
NS           95
CV           47
LMXB         39
HMXB         33
NS_BIN       24
Name: Class, dtype: int64


In [22]:
TD_final.columns[50:100]

Index(['flux_aper90_ave_h', 'e_flux_aper90_ave_h', 'chisqr', 'dof',
       'kp_prob_b_max', 'var_inter_prob', 'significance_max', 'flux_flag',
       'flux_aper90_ave_b', 'e_flux_aper90_ave_b', 'CSC_ID', 'CSC_RA',
       'CSC_DEC', 'CSC_err_r0', 'CSC_err_r1', 'CSC_PA', 'CSC__2CXO', 'CSC__r',
       'CSC_fe', 'CSC_fc', 'GAIA_RA', 'GAIA_DEC', 'GAIA_PU', 'GAIA_Source',
       'GAIA__r', 'GAIA_e_Pos', 'GAIA_Plx', 'GAIA_e_Plx', 'GAIA_PM',
       'GAIA_e_PM', 'GAIA_epsi', 'GAIA_Gmag', 'GAIA_BPmag', 'GAIA_RPmag',
       'GAIA_e_Gmag', 'GAIA_e_BPmag', 'GAIA_e_RPmag', 'TMASS_RA', 'TMASS_DEC',
       'TMASS_err0', 'TMASS_err1', 'TMASS_errPA', 'TMASS__2MASS', 'TMASS__r',
       'TMASS_Jmag', 'TMASS_Hmag', 'TMASS_Kmag', 'TMASS_e_Jmag',
       'TMASS_e_Hmag', 'TMASS_e_Kmag'],
      dtype='object')

In [38]:
catalogs = {'GAIA': ['Gmag','BPmag','RPmag'], 
            'TMASS': ['Jmag','Hmag','Kmag' ],
            'ALLWISE': ['W1mag','W2mag','W3mag','W4mag'],
            'CATWISE':['W1mproPM','W2mproPM']}
for cat in catalogs:
    for col in catalogs[cat]:
        print(col)
        TD_final = TD_final.rename(columns={cat+'_'+col:col, cat+'_e_'+col:'e_'+col})
        
exnum = -99.
for w in ['W1', 'W2']:
    TD_final[w+'mag_comb'] = TD_final[w+'mag']
    TD_final['e_'+w+'mag_comb'] = TD_final['e_'+w+'mag']
    s= np.where((TD_final[w+'mag_comb']==exnum) & (TD_final[w+'mproPM']!=exnum))[0]
    print(sorted(Counter(TD_final.loc[s,'Class']).items()), 'use ',w,' from CATWISE rather than ALLWISE.')
    TD_final.loc[s, w+'mag_comb'] = TD_final.loc[s, w+'mproPM']
    TD_final.loc[s, 'e_'+w+'mag_comb'] = TD_final.loc[s, 'e_'+w+'mproPM']


Gmag
BPmag
RPmag
Jmag
Hmag
Kmag
W1mag
W2mag
W3mag
W4mag
W1mproPM
W2mproPM
[('AGN', 161), ('CV', 6), ('HM-STAR', 8), ('HMXB', 5), ('LM-STAR', 13), ('LMXB', 3), ('NS', 2), ('NS_BIN', 2), ('YSO', 64)] use  W1  from CATWISE rather than ALLWISE.
[('AGN', 161), ('CV', 6), ('HM-STAR', 8), ('HMXB', 5), ('LM-STAR', 13), ('LMXB', 3), ('NS', 2), ('NS_BIN', 2), ('YSO', 64)] use  W2  from CATWISE rather than ALLWISE.


Index(['flux_aper90_ave_h', 'e_flux_aper90_ave_h', 'chisqr', 'dof',
       'kp_prob_b_max', 'var_inter_prob', 'significance_max', 'flux_flag',
       'flux_aper90_ave_b', 'e_flux_aper90_ave_b', 'CSC_ID', 'CSC_RA',
       'CSC_DEC', 'CSC_err_r0', 'CSC_err_r1', 'CSC_PA', 'CSC__2CXO', 'CSC__r',
       'CSC_fe', 'CSC_fc', 'GAIA_RA', 'GAIA_DEC', 'GAIA_PU', 'GAIA_Source',
       'GAIA__r', 'GAIA_e_Pos', 'GAIA_Plx', 'GAIA_e_Plx', 'GAIA_PM',
       'GAIA_e_PM', 'GAIA_epsi', 'Gmag', 'BPmag', 'RPmag', 'e_Gmag', 'e_BPmag',
       'e_RPmag', 'TMASS_RA', 'TMASS_DEC', 'TMASS_err0', 'TMASS_err1',
       'TMASS_errPA', 'TMASS__2MASS', 'TMASS__r', 'Jmag', 'Hmag', 'Kmag',
       'e_Jmag', 'e_Hmag', 'e_Kmag'],
      dtype='object')

In [40]:
TD_final = TD_final.replace({-99.:np.nan})
TD_final = TD_final.replace({1e20:np.nan})
TD_final['Class'] = TD_final['Class'].replace({'NS_BIN':'LMXB'})

print(len(TD_final))
#df_comb_sel = df_comb_sel[~df_comb_sel['name'].isin(XCLASS_remove)].reset_index(drop=True)
#print(len(df_comb_sel))
#print(df_comb_sel['Class'].value_counts())

TD_final.to_csv(f'./data/{field_name}_MW_comb.csv',index=False)

3163


In [175]:
TD_final = pd.read_csv(f'./data/{field_name}_MW_comb.csv')

TD_final.columns[150:]



Index(['Separation_CATWISE_TMASS_dec', 'Separation_CATWISE_ALLWISE',
       'Separation_CATWISE_ALLWISE_ra', 'Separation_CATWISE_ALLWISE_dec',
       'Separation_max', 'ncat', 'dist_bayesfactor',
       'dist_bayesfactor_corrected', 'dist_post', 'p_single', 'p_any', 'p_i',
       'match_flag', 'Gaia', 'rgeo', 'b_rgeo', 'B_rgeo', 'rpgeo', 'b_rpgeo',
       'B_rpgeo', 'gaiadist_flag', '_q', 'W1mag_comb', 'e_W1mag_comb',
       'W2mag_comb', 'e_W2mag_comb'],
      dtype='object')

In [176]:
def clean_ref(ref):
    
    refs = list(set(ref.split(';')))
    #print(refs)
    if len(refs)==1:
        #print('1')
        #print(refs)
        #print(refs[0])
        return refs[0]
    else:
        
        refs_digit = [refs[i] for i in range(len(refs)) if refs[i][0].isdigit()]
        
        if len(refs_digit)>0:
            #print('2')
            #print(refs)
            #print(sorted(refs_digit)[-1])
            #print( sorted(refs_digit))
            return sorted(refs_digit)[-1]
        else:
            #print('3')
            #print(refs)
            #print(sorted(refs)[0])
            return sorted(refs)[0]
       
    

In [177]:
TD_final[TD_final['ref'].str.contains('2010A&A...517A')]

Unnamed: 0,name,name_cat,ra_cat,dec_cat,Class,SubClass,ref,GLAT,GLON,usrid,...,B_rgeo,rpgeo,b_rpgeo,B_rpgeo,gaiadist_flag,_q,W1mag_comb,e_W1mag_comb,W2mag_comb,e_W2mag_comb
3118,2CXO J231331.8+612747,[PMN2010] 17,348.383167,61.463139,HM-STAR,B5/9V;*,2010A&A...517A...2P;SIMBAD,111.514836,0.780824,3538,...,1936.67432,980.726868,865.897461,1112.19324,10133.0,3538,,,,
3119,2CXO J231338.4+612847,[PMN2010] 55,348.410458,61.479889,LM-STAR,G9;*|Y*O,2010A&A...517A...2P;SIMBAD,111.533183,0.791433,3539,...,,,,,,3539,,,,
3120,2CXO J231340.7+612828,[PMN2010] 39,348.419958,61.474472,LM-STAR,K1;*|Y*O,2010A&A...517A...2P;SIMBAD,111.535479,0.784807,3540,...,5008.32764,4171.00635,2307.33447,5628.48975,10033.0,3540,,,,


In [178]:
# 2021ApJ...908...49F     # An Improved Hertzsprung-Russell Diagram for the Orion Trapezium Cluster
# 2004ApJ...610.1045S     # The Spectroscopically Determined Substellar Mass Function of the Orion Nebula Cluster
# 1979ApJS...41..743C     # Observational studies of pre-main-sequence evolution
# 2004ApJ...616..998W     # On the Evolutionary Status of Class I Stars and Herbig-Haro Energy Sources in Taurus-Auriga
# 2004AJ....128.1233H     # LkHα 101 and the Young Cluster in NGC 1579
# 2010A&A...517A          # Post-flare evolution of AR 10923 with Hinode/XRT, near a star-forming region

# 2CXO J122745.2+130012, in NGC 4438 galaxy

TD_final = TD_final[~TD_final['ref'].str.contains('2004ApJ...610.1045S|2021ApJ...908...49F|1979ApJS...41..743C|2004ApJ...616..998W|2004AJ....128.1233H|2010A&A...517A')].reset_index(drop=True)
TD_final = TD_final[TD_final['name']!='2CXO J122745.2+130012'].reset_index(drop=True)
TD_final['refs'] = TD_final['ref']
TD_final['ref'] = TD_final.apply(lambda r:  clean_ref(r['refs']),axis=1)
TD_final['ref'] = TD_final['ref'].replace({'SIMBAD':'Simbad'})
TD_final.to_csv(f'./data/{field_name}_MW_comb_cleaned.csv',index=False)



In [179]:
#print(TD_final.columns[100:150])

TD_final_NSclean = TD_final.copy()

for col in ['Gmag','BPmag','RPmag','Jmag','Hmag','Kmag','W1mag','W2mag','W3mag','W4mag','W1mag_comb','W2mag_comb','rgeo']:
    TD_final_NSclean.loc[TD_final_NSclean['Class']=='NS', col] = np.nan

TD_final_NSclean.to_csv(f'./data/{field_name}_MW_comb_NScleaned.csv',index=False)


In [180]:
print(TD_final['Class'].value_counts())

AGN        1405
YSO         748
LM-STAR     607
HM-STAR     128
NS           95
LMXB         63
CV           47
HMXB         33
Name: Class, dtype: int64


In [163]:
#print(TD_final[TD_final['Class']=='LM-STAR', 'ref'].value_counts()[:50])
print(TD_final['ref'].value_counts()[:60])
TD_final['ref1'] = TD_final.apply(lambda r: r['ref'][0], axis=1)
print(TD_final.loc[~TD_final['ref1'].str.isdigit(), 'ref'].value_counts())
#print(TD_final[TD_final['ref']=='LAMOST-DR8-AFGK;LAMOST-DR8-AFGK;LAMOST-DR8-AFGK;SIMBAD'])

2010A&A...518A..10V                   1349
2012AJ....144..192M                    589
LAMOST-DR8-AFGK                        284
2005AJ....129.1993M                    104
2018ApJS..235....4O                     99
LAMOST-DR8-M                            96
2011ApJS..194...14P                     84
2020AJ....160..120J                     79
2020RNAAS...4..219J                     33
2007A&A...463..275G                     32
2011A&A...531A.141D                     22
2019ApJS..241...32L                     21
2011ApJS..196....4R                     18
2001NewAR..45..135V                     17
2007A&A...469..807L                     16
Simbad                                  11
2007ApJ...664.1102K                     11
2016AJ....152..190A                     11
1958PMcCO..13b....V                      9
1982MSS...C03....0H                      9
2005JAD....11....2D                      9
1956ApJ...124..530S                      8
1978MSS...C02....0H                      7
1975MSS...C

In [46]:
simbad = pd.read_csv(f'./data/{field_name}_SIMBAD.csv')
simbad

Unnamed: 0,name_cat,ra_cat,dec_cat,e_Pos,Class,SubClass,ref,sep,name,ra,...,r0,r1,PA,PU,nbref,coo_bibcode,rare-type,remove_regions,GLAT,GLON
0,2MASS J23595985+0043327,359.999298,0.725647,0.000,low-mass*,*|IR|LM*,SIMBAD,2.905539,2CXO J000000.0+004331,0.000020,...,0.887,0.815,40.299999,0.887000,2.0,2020yCat.1350....0G,4,,96.907047,-59.522084
1,GALEX 2674128848516287376,0.007867,-9.781120,0.001,Blue,*|UV|blu,SIMBAD,1.801496,2CXO J000001.8-094653,0.007652,...,4.391,4.291,23.139999,4.391000,1.0,2020yCat.1350....0G,4,,85.551010,-68.835367
2,[VV2003c] J000002.3+003739,0.009583,0.627500,,Seyfert_1,AGN|Sy1,SIMBAD,0.390247,2CXO J000002.2+003739,0.009569,...,1.799,1.079,107.800003,1.799000,0.0,,4,,96.849010,-59.615722
3,QSO B2357-3520,0.012188,-35.059058,0.000,QSO,*|IR|QSO|X,SIMBAD,0.384430,2CXO J000002.9-350332,0.012318,...,0.855,0.736,172.600006,0.855000,9.0,2020yCat.1350....0G,4,,353.944479,-76.457965
4,2MASX J00000472+0046546,0.019775,0.781717,0.006,Seyfert_2,*|G|Sy2,SIMBAD,0.352651,2CXO J000004.7+004654,0.019850,...,0.945,0.882,42.939999,0.945076,3.0,2020yCat.1350....0G,4,,96.986429,-59.477794
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93883,QSO J2359-1241,359.973462,-12.696654,0.000,QSO,*|IR|QSO|Rad,SIMBAD,0.354427,2CXO J235953.6-124148,359.973544,...,0.733,0.729,176.399994,0.733000,44.0,2020yCat.1350....0G,4,,80.503700,-71.187814
93884,SDSS J235957.04+135643.2,359.987676,13.945358,0.001,QSO,*|QSO,SIMBAD,0.281932,2CXO J235957.0+135643,359.987635,...,2.040,1.273,153.699997,2.040001,5.0,2020yCat.1350....0G,4,,104.424913,-47.067893
93885,SDSS J235957.55+003919.8,359.989825,0.655514,0.077,QSO,G|QSO,SIMBAD,0.235522,2CXO J235957.5+003919,359.989889,...,0.921,0.750,119.199997,0.933786,4.0,2009ApJS..182..543A,4,,96.834821,-59.582327
93886,[VV2006] J235958.2+005139,359.992564,0.861058,0.000,QSO,*|QSO,SIMBAD,1.108061,2CXO J235958.2+005140,359.992851,...,3.002,2.125,20.920000,3.002000,25.0,2020yCat.1350....0G,4,,96.998315,-59.394159


In [32]:
TD_final[TD_final['name']=='2CXO J053515.7-052424']

Unnamed: 0,name,name_cat,ra_cat,dec_cat,Class,SubClass,ref,GLAT,GLON,usrid,...,B_rgeo,rpgeo,b_rpgeo,B_rpgeo,gaiadist_flag,_q,W1mag_comb,e_W1mag_comb,W2mag_comb,e_W2mag_comb


In [151]:
XCLASS_remove = {
    # LM-STAR to the up right of MS, confused with YSO
    "2CXO J074108.5+202544",# 'Symbiotic Star', RG + WD
    "2CXO J084621.1+013755",# Mira variable,  RG
    "2CXO J184632.0-025724",# Long-period variable star
    "2CXO J200509.2+322201",# new sources detected by LAMOST-DR8-M
    "2CXO J022635.3+621937",# might be a B0-4Ve_sh D type star
    "2CXO J053517.8-052430",# Irregular Variable
    "2CXO J013632.0+154507",# High Proper Motion Star
    "2CXO J043019.1+351745",# Emission-line Star
    "2CXO J053515.7-052424",# YSO 
    "2CXO J053525.3-052345",# Irregular Variable
    "2CXO J162812.2-245044",# RotV*
    "2CXO J203323.0+411222",#Em*
    "2CXO J205821.5+435344",#Em*
    #"2CXO J053524.9-052401",
    #"2CXO J053525.4-052333",
    #"2CXO J122745.2+130012",
    #"2CXO J131218.4-623730",
    #"2CXO J203342.9+413000",
    #"2CXO J231340.7+612828",
    # LM-STAR to the up left of MS, confused with HM-STAR
    "2CXO J055222.1+323843",# F0 E type, emission lines?
    "2CXO J110435.9-584520",# SB* 
    "2CXO J205448.6-582715",# K1II C 1975MSS...C01....0H
    #"2CXO J055202.2+323043",# K1III D 
    # HM-STAR to the up right of MS, confused with YSO
    "2CXO J061254.9+175920",#Y*O ([CAH2008]
    #"2CXO J104517.2-594701",
    #"2CXO J104530.2-594820",
    #"2CXO J231331.8+612747",
    # YSO
    #"2CXO J053528.6-044816",
    }



In [51]:
simbad.loc[simbad['ref']=='2021ApJ...908...49F',['ra','dec']]

Unnamed: 0,ra,dec
17319,83.782181,-5.3575
17366,83.78513,-5.38469
17421,83.791238,-5.394004
17474,83.793864,-5.371221
17493,83.795172,-5.377938
17503,83.795739,-5.373492
17647,83.803962,-5.41602
17666,83.80471,-5.414636
17760,83.808002,-5.388867
17801,83.809984,-5.389679


In [None]:
LAMOST-DR8-AFGK        549 
2020AJ....160..120J    400 # APOGEE
LAMOST-DR8-M           317 # 
2021ApJ...908...49F     44 # An Improved Hertzsprung-Russell Diagram for the Orion Trapezium Cluster
2002AJ....123.1528R     27 # Circumstellar Disk Candidates Identified in NGC 2264, the pre–main-sequence population in the young cluster NGC 2264
2006ApJ...646.1215L     19 # Low-Mass Stars and Brown Dwarfs in NGC 2024: Constraints on the Substellar Mass Function, NGC 2024 is also a young cluster
1939AnBos...5E...1W     18 # A study of the distribution of the stars in the Sagittarius and Ophiuchius regions of the Milky Way
2003ApJ...593.1093L     18 # A Census of the Young Cluster IC 348
1982MSS...C03....0H     15 # Catalogue of two-dimensional spectral types for the HD stars, Vol. 3
1975MSS...C01....0H     12 # Catalogue of two dimentional spectral types for the HD stars, Vol. 1
1978MSS...C02....0H     12 # Catalogue of two dimentional spectral types for the HD stars, Vol. 2
1958PMcCO..13b....V     11 # Spectral classification of faints stars declination zones -2° to +49°
2004ApJ...610.1045S     10 # The Spectroscopically Determined Substellar Mass Function of the Orion Nebula Cluster
2004AJ....127.1131W      9 # Low-Mass Stars and Substellar Objects in the NGC 1333 Molecular Cloud
2003AJ....125.1480A      8 # The Evolutionary State of Stars in the NGC 1333S Star Formation Region
1956PMcCO..13a....B      7 # ??? 
1952CoRut..32....1H      7 # Photographic magnitudes of 55700 stars in the zones 10 deg to 20 deg and 30 deg to 50 deg
1940ApJ....91..244E      7 # Proper Motions in the Galactic Cluster M 67
2006A&A...460..695T      7 # Search for associations containing young stars (SACY). I. Sample and searching method

In [171]:
#for star in XCLASS_remove:
    #print(star)
    #print(simbad.loc[simbad['name']==star, ['name','Class','SubClass']])
simbad.loc[(simbad['name'].isin(XCLASS_remove)), ['name','ra','dec','Class','SubClass','ref']][:50]
#simbad.loc[(simbad['name'].isin(XCLASS_remove)) & (simbad['ref']=='SIMBAD'), ['name','ra','dec','Class','SubClass','ref']]

Unnamed: 0,name,ra,dec,Class,SubClass,ref
6052,2CXO J013632.0+154507,24.133573,15.752117,LM-STAR,K7,LAMOST-DR8-AFGK
6053,2CXO J013632.0+154507,24.133573,15.752117,PM*,*|PM*,SIMBAD
8356,2CXO J022635.3+621937,36.647098,62.32695,LM-STAR,GKg_a,2020AJ....160..120J
14325,2CXO J043019.1+351745,67.579844,35.295953,LM-STAR,K3,LAMOST-DR8-AFGK
14326,2CXO J043019.1+351745,67.579844,35.295953,LM-STAR,G5,LAMOST-DR8-AFGK
14327,2CXO J043019.1+351745,67.579844,35.295953,Em*,*|Em*|IR|NIR|Rad|X|cm,SIMBAD
17942,2CXO J053515.7-052424,83.815762,-5.406815,LM-STAR,M4,2021ApJ...908...49F
17943,2CXO J053515.7-052424,83.815762,-5.406815,YSO,*|IR|Ir*|Y*O,SIMBAD
18189,2CXO J053517.8-052430,83.824232,-5.408442,LM-STAR,M6.5,2021ApJ...908...49F
18190,2CXO J053517.8-052430,83.824232,-5.408442,Irregular_V*,*|IR|Ir*,SIMBAD
