In [None]:
# script to organize BL APF files

In [1]:
import os
import numpy as np
import astropy.io.fits 
from astroquery.simbad import Simbad
import csv
import hashlib
import datetime

In [2]:
# find  names of each star in order to compare the same stars
def get_names(name): 
    # define some flags
    known_not_a_star = False
    known_binary = False
    
    # reformat the name
    if name[0].isdigit():
        name = 'HD' + name
        if (name.endswith('A') or name.endswith('B')):
            name = name[:-1]
            known_binary = True
    elif name.startswith('K0'):
        name = 'KOI-' + name[1:]
    elif name.startswith('EPIC-'):
        name = 'EPIC' + name[5:]
    elif name.startswith('KIC') and name.endswith('t'):
        name = name[:-1]
    elif name.startswith('NGC'):
        known_not_a_star = True
        if ('_' in name):
            if name[-2] == '_':
                name = name.split('_')[0]
            else:
                name = name.replace(' ' , '_') 
    elif name == 'etaCrv':
        name = 'eta_Crv'
    elif name == 'epsCep':
        name = 'eps_Cep'
    elif name == 'bTau':
        name = 'b_Tau'
    elif name == 'SO0253':
        name = 'TIC257870150'
    elif name == 'Alderami':
        name = 'alderamin'
    elif name == 'TIC396356111':
        name = 'HIP848'
    elif name == 'TIC286132427':
        name = 'HIP47990'
    elif name.startswith('hip') and not name.startswith('hipass'): # this is purely aesthetic - 'hip' would still query
        name = 'HIP' + name[3:]
    
    # query Simbad        
    try:
        table1 = Simbad.query_object(name).to_pandas()
        otypes = table1['OTYPES']
        otypes_list = table1['OTYPES'][0].split('|')
        main_otype = table1['OTYPE_3'][0]
        table2 = Simbad.query_objectids(name)
        simbad_resolvable_name = name
        alternative_names = table2.to_pandas()
        alternative_names = alternative_names.iloc[:,0].str.decode('utf-8') # gets rid of weird formatting
        alt_names = alternative_names.to_list()
        if name.startswith('HIP') and not name.startswith('HIPASS'):
            HIP_name = name  
        else:
            try: 
                #HIP_name = alternative_names[alternative_names.str.contains("HIP")]
                #HIP_name = alternative_names[alternative_names.str.startswith("HIP") * (not alternative_names.str.startswith("HIPASS"))]
                HIP_name = [x for x in alternative_names.tolist() if x.startswith('HIP') and not x.startswith('HIPASS')]
                if len(HIP_name) == 0:
                    HIP_name = 'None'
                    print(name + ' has no HIP name in SIMBAD') # this should never occur in calibration dataset
                elif len(HIP_name) == 1:
                    HIP_name = str(HIP_name[0]).replace(' ', '') # makes it a string instead of dataframe with one cell  
                else:
                    print('more than one HIP name found for ' + name)
            except AttributeError:
                HIP_name = 'None'
                print(name + ': SIMBAD search error (likely \'No known catalog could be found\' or \'this identifier has an incorrect format for catalog\'') # should not happen
    except:
        HIP_name = 'None'
        simbad_resolvable_name = 'None'
        alt_names = 'Not found'
        main_otype = 'Not found'
        otypes_list = ['Not found']
        print(name + ' not resolved by SIMBAD')
    return HIP_name, simbad_resolvable_name, alt_names, known_not_a_star, known_binary, main_otype, otypes_list


In [134]:
# find all names of each star in order to compare the same stars
#def get_names(name):     
#    if name[0].isdigit():
#        name = 'HD' + name
#    elif name.startswith('K0'):
#        name = 'KOI-' + name[1:]
#    elif name.startswith('EPIC-'):
#        name = 'EPIC' + name[5:]
#    try:
#        result_table = Simbad.query_objectids(name)
#        alt_names = result_table.to_pandas()
#        alt_names = alt_names.iloc[:,0].str.decode('utf-8') #gets rid of weird formatting
#    except AttributeError:
#        HIP_name = 'None'
#        print(name + ': SIMBAD search error (likely \'No known catalog could be found\' or \'this identifier has an incorrect format for catalog\'') # should not happen
#    return HIP_name


In [3]:
total_files = 0
raw_files = []
num_raw = 0
reduced_files = []
num_reduced = 0
other_files = []
num_other = 0 
database_paths = ['/datax/scratch/hisaacson/data', '/datag/blpd0/datax/apf']
log_file_name = 'apf_log_full_19Nov2021.csv' #'additional_spectra_log_27Oct2021.csv' #'apf_log_full_16Aug2021.csv'

fieldnames = ['Filename', 'Tobj', 'Obj', 'HIP_name', 'Simbad_resolvable_name', 'Alt_names', 'Filepath', 'Thorium1', 'Thorium2', 'Halogen1', 'Halogen2', 'Iodine_cell', 'RA','DEC',
              'Date_obs', 'Date_logged', 'SNR', 'Known_non-stellar', 'Known_binary', 'Main_type', 'Types', 'Filesize','Md5']
with open(log_file_name, 'w') as csvfile: 
    csvwriter = csv.writer(csvfile) 
    csvwriter.writerow(fieldnames)  
    
Simbad.add_votable_fields('otypes')
Simbad.add_votable_fields('otype(3)')    
for database_path in database_paths:
    for file in os.listdir(database_path):
        total_files += 1
        if file.startswith('ucb'):
            raw_files += [file]
            num_raw += 1
        elif file.startswith('r'):
            reduced_files += [file]
            num_reduced += 1          
            hdul = astropy.io.fits.open(database_path + '/' + file)
            info = hdul[0].header
            name  = info['OBJECT']
            HIP_name, simbad_resolvable_name, alt_names, known_not_a_star, known_binary, main_otype, otypes_list = get_names(name)
            #print(name  + ', or ' + HIP_name)
            tobj = info['TOBJECT']
            obj = info['OBJECT']
            thor1 = info['THORIUM1']
            thor2 = info['THORIUM2']
            hal1 = info['HALOGEN1']
            hal2 = info['HALOGEN2']
            iod = info['ICELNAM']
            date = info['DATE']
            RA = info['RA']
            DEC = info['DEC']
            filesize = os.path.getsize(database_path + '/' + file)      
            md5_hash = hashlib.md5()
            with open(database_path + '/' + file,"rb") as f: # Is this what the Md5 feild should be?
                # Read and update hash in chunks of 4K
                for byte_block in iter(lambda: f.read(4096),b""):
                    md5_hash.update(byte_block)
                    Md5 = md5_hash.hexdigest()
            data = hdul[0].data
            order_data = data[45,:]
            SNR = np.sqrt(np.median(order_data))
            dt = datetime.datetime.now()
            log_date = dt.strftime("%d") + dt.strftime("%b") + dt.strftime("%Y") + '-' + dt.strftime("%X") 
            row = [[file, tobj, obj, HIP_name, simbad_resolvable_name, alt_names, database_path, thor1, thor2, hal1, hal2,
                    iod, RA, DEC, date, log_date, SNR, known_not_a_star, known_binary, main_otype, otypes_list, filesize, Md5]]
            with open(log_file_name, 'a') as csvfile: 
                csvwriter = csv.writer(csvfile) 
                csvwriter.writerows(row) 
        else:
            other_files += [file]
            num_other += 1
        
        
print('In director(y/ies):')
print(str(total_files) + ' total files.')
print(str(num_raw) + ' raw files.')
print(str(num_reduced) + ' reduced files.')
print(str(num_other) + ' other files.')

HD210610 has no HIP name in SIMBAD
HD210610 has no HIP name in SIMBAD
HD210610 has no HIP name in SIMBAD
KIC3542116 has no HIP name in SIMBAD
KIC3542116 has no HIP name in SIMBAD
NGC_7654_764 has no HIP name in SIMBAD
NGC_7654_764 has no HIP name in SIMBAD
NGC_7654_764 has no HIP name in SIMBAD
NGC_7654_764 has no HIP name in SIMBAD
NGC_7654_756 has no HIP name in SIMBAD
NGC_7654_806 has no HIP name in SIMBAD
NGC4318 has no HIP name in SIMBAD
NGC4318 has no HIP name in SIMBAD
NGC4318 has no HIP name in SIMBAD
NGC4318 has no HIP name in SIMBAD
NGC4318 has no HIP name in SIMBAD
NGC4318 has no HIP name in SIMBAD
NGC4318 has no HIP name in SIMBAD
NGC5322 has no HIP name in SIMBAD
NGC5322 has no HIP name in SIMBAD
NGC5322 has no HIP name in SIMBAD
NGC5322 has no HIP name in SIMBAD
NGC5322 has no HIP name in SIMBAD
NGC6720 has no HIP name in SIMBAD
NGC6720 has no HIP name in SIMBAD
NGC6720 has no HIP name in SIMBAD
TIC328350926 has no HIP name in SIMBAD
TIC328350926 has no HIP name in SIMBAD

In [125]:
name



'TIC396356111'

In [50]:
fieldnames = [['Filename'],['Tobj'],['Obj']]#,'HIP_name','Thorium1','Thorium2','Haolgen1','Halogen2','Date','RA','DEC','Md5','Filesize']
log_file_name = 'testing.csv'
with open(log_file_name, 'w', encoding='UTF8', newline='') as csvfile: 
    csvwriter = csv.writer(csvfile) 
    csvwriter.writerow(fieldnames)  

In [43]:
name1 = 'HD211336'
name2 = 'TIC 330608569'
name3  = 'HIP 10985'
result_table = Simbad.query_objectids(name3)
alt_names = result_table.to_pandas()
alt_names = alt_names.iloc[:,0].str.decode('utf-8') # gets rid of weird formatting

In [30]:
alt_names3 = alt_names

In [47]:
alt_names.to_list()

['TIC 409523757',
 '2MASS J02213565+4435596',
 'ADS  1795 AB',
 'AG+44  237',
 'BD+43   474',
 'CCDM J02216+4436AB',
 'GC  2824',
 'HD  14477',
 'HIC  10985',
 'HIP  10985',
 'IDS 02152+4408',
 'PPM  45017',
 'SAO  37971',
 'SKY#  3482',
 'TD1  1371',
 'WDS J02216+4436AB',
 '** STF  249AB',
 'TYC 2843-1459-1',
 'Renson 3650',
 'BD+43   474A']

In [71]:
a = '1210106A'

In [72]:
if a[0].isdigit():
    a = 'HD' + a
    if (a.endswith('A') or a.endswith('B')):
        a = a[:-1]

In [73]:
a

'HD1210106'

In [75]:
import pandas as pd
idx = pd.Index(['Harry', 'Mike', 'Arther', 'Nick'],
                                  name ='Student')
idx

Index(['Harry', 'Mike', 'Arther', 'Nick'], dtype='object', name='Student')

In [78]:
idx.to_list()

['Harry', 'Mike', 'Arther', 'Nick']

In [79]:
idx

Index(['Harry', 'Mike', 'Arther', 'Nick'], dtype='object', name='Student')

In [86]:
result_table = Simbad.query_objectids('vega')
simbad_resolvable_name = name
alternative_names = result_table.to_pandas()
alternative_names = alternative_names.iloc[:,0].str.decode('utf-8') # gets rid of weird formatting
alt_names = alternative_names.to_list()
type(alternative_names)

pandas.core.series.Series

In [91]:
HIP_name = alternative_names[alternative_names.str.contains("HIP")]
str(HIP_name.iloc[0]).replace(' ', '')

'HIP91262'

In [90]:
type(HIP_name)

pandas.core.series.Series

In [115]:
    result_table = Simbad.query_objectids('HIP101345')
    simbad_resolvable_name = name
    alternative_names = result_table.to_pandas()
    alternative_names = alternative_names.iloc[:,0].str.decode('utf-8') # gets rid of weird formatting
    alt_names = alternative_names.to_list()
    if name.startswith('HIP') and not name.startswith('HIPASS'):
        HIP_name = name  
    elif name.startswith('hip') and not name.startswith('hipass'):
        HIP_name = 'HIP' + name[3:]
    else:
        #HIP_name = alternative_names[alternative_names.str.contains("HIP")]
        HIP_name = [x for x in alternative_names.tolist() if x.startswith('HIP') and not x.startswith('HIPASS')]


In [119]:
a = str(HIP_name[0]).replace(' ', '')
a

'HIP101345'

In [None]:
from astroquery.simbad import Simbad
Simbad.add_votable_fields('otypes')
Simbad.add_votable_fields('otype')
result_table = Simbad.query_object('HIP101345')#'LP 54-19')
results = result_table.to_pandas()
otypes = results['OTYPES']
otypes_list = results['OTYPES'][0].split('|')
#'SB*' in otypes_list #== 'err' # not an object
otype = results['OTYPE']
otype
#otype.contains('*') # is a star of some sort


In [35]:
otype[0]

'PM*'