In [None]:
# group_by_SNR.ipynb
# Many stars that have mulitple APF spectra have some spectra from different nights of observation. 
# Calculates the SNR for each group of spectra from one night of observing (calc_SNR combines all observations of one 
# star and returns an SNR for the star instead), then finds for each star which group of observations together has the 
# highest SNR. Will use only highest SNR group in run of Specmatch-Emp for each star.
# Last modified 8/12/20 by Anna Zuckerman 


In [2]:
import os
import pandas as pd
import numpy as np
import astropy.io.fits
import shutil

In [3]:
def get_SNR(path_name, filenames): # Modified from get_SNR in calc_SNR
    order_data = np.zeros([4608,1])
    for spect_file in filenames:
        hdul = astropy.io.fits.open(path_name + '/' + spect_file) 
        order_data = np.add(order_data,(hdul[0].data)[45])
    SNR = np.sqrt(np.median(order_data))
    return SNR

In [9]:
# for stars with mulitple spectra, get the set of observations with the highest SNR
big_path = './APF_spectra/all_apf_spectra'
SNR_filename = 'all_apf_highest_SNRs.csv'
SNR_list = []
HIP_names = []
pathlist = [path for path in sorted(os.listdir(big_path)) if os.path.isdir(big_path + '/' + path)]
for star_dir in pathlist:
    HIP_names += [star_dir.split('_')[0]]
    spectlist = os.listdir(big_path + '/' + star_dir)
    try: spectlist.remove('.ipynb_checkpoints')
    except: ValueError
    obslist = [filename.split('.')[0] for filename in spectlist]
    unique_obs = list(dict.fromkeys(obslist)) #list of all observations of that star
    highest_SNR = 0
    highest_SNR_obs = ''
    for obs in unique_obs:
        obs_files = [file for file in spectlist if file.split('.')[0] == obs]
        SNR_obs = get_SNR(big_path + '/' + star_dir, obs_files)
        if SNR_obs > highest_SNR:
            highest_SNR = SNR_obs
            highest_SNR_obs = obs
            SNR_list += [highest_SNR]
#    new_dir_name = './APF_spectra/all_apf_spectra_highest_SNR/' + star_dir
    highest_SNR_obs_files = [file for file in spectlist if file.split('.')[0] == highest_SNR_obs]
#    os.mkdir(new_dir_name)
#    for file in highest_SNR_obs_files:
#        shutil.copyfile(big_path + '/' + star_dir + '/' + file, new_dir_name + '/' + file)

df = pd.DataFrame(list(zip(HIP_names, SNR_list)), columns =['HIP_name', 'Highest observation set SNR'])
df.to_csv('./' + SNR_filename)

In [50]:
# for stars with only one spectrum, copy that spectrum directly -- not applicable for ./APF_spectra/all_apf_spectra
pathlist_notdir = [path for path in sorted(os.listdir(big_path)) if not(os.path.isdir(big_path + '/' + path))]
for file in pathlist_notdir:
    shutil.copyfile(big_path + '/' + file, './APF_spectra/apf_spectra_highest_SNR' + '/' + file)


In [6]:
# check that all stars were processed
print(len(os.listdir('./APF_spectra/all_apf_spectra')))
print(len(os.listdir('./APF_spectra/all_apf_spectra_highest_SNR')))

810
810
