In [None]:
# check_file_labeling.ipynb
# Script to check various aspects of file labeling and organization
# Last modified 8/12/20 by Anna Zuckerman 


In [None]:
import os
import astropy.io.fits
import pandas as pd

# check that all files have the same header label as their label in all_apf_non_i2
all_apf_non_i2 = pd.read_csv('all_apf_non_i2.csv')
dirpath = './APF_spectra/apf_spectra'
bad_files = []
bad_dirs = []
n=1
for (root,dirs,files) in os.walk(dirpath, topdown=True):  # get to each spectra file whether in subdirectory or not
    header_names_in_dir = []
    print(dirs)
    for file in files:
        filepath = root + '/' + file
        filename = file.split('.')[0] + '.' + file.split('.')[1]
        apf_row = all_apf_non_i2[all_apf_non_i2['Column1'] == filename]
        apf_name = apf_row.values.tolist()[0][1] #the name as listed in all_apf_non_i2
        hdul = astropy.io.fits.open(filepath) 
        header = hdul[0].header
        header_name = header['TOBJECT'] 
        header_names_in_dir = header_names_in_dir + [header_name]
        if header_name == apf_name:
            print(file + ': ' +  apf_name + ' is GOOD.')
        elif header_name != apf_name:
            print('MISLABELED: ' + file + ' has apf_name ' + apf_name + ' but header_name ' + header_name)
            bad_files = bad_files + [file]
    n+= 1

In [None]:
# given a directory for one star, check that all spectra are for the same star
# NOTE: This DOES NOT WORK, becuase recorded star name, RA, and DEC can all be slightly different for even for the same star

def check(dirpath):
    names = []
    RA = []
    DEC = []
    if os.path.isdir(dirpath):
        print(dirpath.split('/')[-1] + ': ')
        for file in sorted(os.listdir(dirpath)):
            hdul = astropy.io.fits.open(dirpath + '/' + file) 
            header = hdul[0].header
            names = names + [header['TOBJECT']]
            RA = RA + [header['RA']]
            DEC = DEC + [header['DEC']]
        if not(names.count(names[0]) == len(names)) and (RA.count(RA[0]) == len(RA)) and (DEC.count(DEC[0]) == len(DEC)):
            #print('Not all same header names, but RA and DEC same')
            pass
        elif not(names.count(names[0]) == len(names)) and not(RA.count(RA[0]) == len(RA)) and not(DEC.count(DEC[0]) == len(DEC)):
            print('Names, RA, DEC all NOT the same')
            print(names)
            print(RA)
            print(DEC)
        elif (names.count(names[0]) == len(names)) and (RA.count(RA[0]) == len(RA)) and (DEC.count(DEC[0]) == len(DEC)):
            #print('Names, RA, DEC all same')
            pass

path = './APF_spectra/apf_spectra'
filelist = sorted(os.listdir(path))
filelist.remove('.ipynb_checkpoints')
for dirpath in filelist: 
    check(path + '/' + dirpath)

    

In [None]:
# given a directory for one star, check that all spectra are for the same star
# by using coordinates in simbad

def check2(pathname):
    if os.path.isdir(pathname):
        all_RA_DEC = []
        for file in sorted(os.listdir(pathname)):
            filename = file.split('.')[0] + '.' + file.split('.')[1]
            row = apf_name_conv[apf_name_conv['FILENAME'] == filename]
            HIP_name = row['HIP_NAME'].tolist()[0]
            result_table = Simbad.query_object(HIP_name)
            results = result_table.to_pandas()
            #[RA,DEC] = results.iloc[0,1:3]
            RA_DEC = results.iloc[0,1] + ', ' + results.iloc[0,2]
            all_RA_DEC = all_RA_DEC + [RA_DEC]
        if all_RA_DEC.count(all_RA_DEC[0]) == len(all_RA_DEC):
            print('All spectra for ' + pathname + ' are for the same star.')
        elif not(all_RA_DEC.count(all_RA_DEC[0]) == len(all_RA_DEC)):
            print('NOT all spectra for ' + pathname + ' are for the same star!')

from astroquery.simbad import Simbad
import pandas as pd
import os
apf_name_conv = pd.read_csv('apf_name_conversion_updated.csv')   
path = './APF_spectra/apf_spectra'
pathlist = sorted(os.listdir(path))
pathlist.remove('.ipynb_checkpoints')
for dirpath in pathlist: 
    check2(path + '/' + dirpath)