                                       References
[HIRES PRV Documentation](https://caltech-ipac.github.io/hiresprv/hiresprv.html)

                                     Imports & Setup
                                        

In [1]:
from astropy import units as u
from astropy.io import fits
from hiresprv.auth import login
from hiresprv.idldriver import Idldriver
from hiresprv.database import Database
from hiresprv.download import Download
from PyAstronomy import pyasl
from specutils.fitting import fit_generic_continuum
from specutils.spectra import Spectrum1D

import numpy as np
import pandas as pd 


In [2]:
login('prv.cookies')                 # For logging into the NExSci servers 

idl = Idldriver('prv.cookies')       # For creating RV scripts 

state = Database('prv.cookies')      # For retrieving data from HIRES 

data = Download('prv.cookies', './RVOutput') # For downloading spectra

KOA userid: jgussman
KOA Password: ········
Successful login as jgussman


                                         Data 

In [3]:
# starnames_crossmatch_SPOCS_NEXSCI.txt was put together by Malena Rice 
# This matches SPOC stars with their HIRES ID 
crossMatchedNames = pd.read_csv("../spocData/starnames_crossmatch_SPOCS_NEXSCI.txt",sep=" ")

                                        RV Data

In [4]:
#Getting the RV data for each star
#1) Find all the RV observations 
def Find_and_download_all_rv_obs(star_ID_List,idldriver,database):
    '''    
    INPUT: star_ID_List is a list that contains strings of the HIRES ID for stars 
           idldriver needs be a hiresprv.idldriver.Idldriver instance 
           database needs to be hiresprv.database.Database instance 
    OUTPUT: {keys = HIRES filename: value = RV Data}
    
    Description: This function downloads the rotational velocity metadata and
    returns a dictionary that makes it easy to identify what stars' max observed 
    rotational velocities as well as the filenames for which they came from.
     
    '''
    rv_script_name_list = []
    problem_child_name = []
    problem_child_rv = []
    problem_child_filename = []
    master_script = ""
    for HIRESname in star_ID_List:
        try:
                #Create script for reducing RV observations
                temp_rv_script = idldriver.create_rvscript(HIRESname,database) 

                length_of_name = len(HIRESname)
                first_date = temp_rv_script[3+length_of_name:13+length_of_name].split(".")[0]

                HIRESrvname = temp_rv_script[3:length_of_name+3].split(" ")[0]

                temp_rv_script =  "template {0} {1}\n".format(HIRESrvname,first_date) + temp_rv_script
                temp_rv_script += "\nrvcurve {0}\n".format(HIRESrvname)
                rv_script_name_list.append(HIRESrvname)
                master_script += temp_rv_script

        except AttributeError: #This is due to the idldriver.create_rvscript 
            problem_child_name = [HIRESname] + problem_child_name
            problem_child_rv += [pd.NA]
            problem_child_filename += [pd.NA]
        
    #Run script 
    idldriver.run_script(master_script) 
    
    #Downloading the RV data as well as getting the largest RV value for each star
    largest_rv = {"HIRESName": problem_child_name,"FILENAME":problem_child_filename, "RV":problem_child_rv}  
    localdir = data.localdir
    for name in rv_script_name_list:
        #Make sure the data is in workspace
        largest_rv["HIRESName"].append(name)
        try:
            rtn = data.rvcurve(name)
            nameLoc = '{0}/vst{1}.csv'.format(localdir,name)
            temp_df = pd.read_csv(nameLoc)
            if not temp_df.empty:
                rv_temp = abs(temp_df['RV'])
                row = temp_df[temp_df['RV'] == rv_temp.min()]
                if row.empty: #The absolute max rv is negative 
                    row = temp_df[temp_df['RV'] == -rv_temp.min()]
                largest_rv["RV"] += [row["RV"].to_numpy()[0]]
                largest_rv["FILENAME"] += [row["FILENAME"].to_numpy()[0]]
            else:
                largest_rv["RV"] += [pd.NA]
                largest_rv["FILENAME"] += [pd.NA]
        except OSError: #This error occurs because for some reason the star's rvcurve wasn't created
                largest_rv["RV"] += [pd.NA]
                largest_rv["FILENAME"] += [pd.NA]
    return largest_rv

                                       Spectra

In [5]:
#Retreive all the Spectra 
def DownloadSpectra(filename_rv_df,download):
    '''
    Input: filename_rv_df dataframe with a column called "FILENAME" that contains the HIRES 
                          filename you want to download the deblazed spectra of. 
           download needs to be a hiresprv.download.Download instance 
   
    Output: {HIRES FILENAME: 1-D spectra array}
    
    Description: THIS FUNCTION ASSUMES YOU DID .dropna() ON filename_rv_df
    This function downloads all the 
    '''
    
    spectraDic = {} 
    download_Location = download.localdir #This is the second parameter of hiresprv.download.Download
    for filename in filename_rv_df["FILENAME"]:
        #I tried to use the , seperation and new line seperation 
        #for the different file names but it doesn't seem to work.
        #Thus, a for-loop was used!
        download.spectrum(filename.replace("r",""))  #Download spectra 
        
        temp_deblazedFlux = fits.getdata("{0}/{1}.fits".format(download_Location,filename))
        spectraDic[filename] = np.append(temp_deblazedFlux[0],[temp_deblazedFlux[i] for i in range(1,16)])     
    
    return spectraDic    

In [35]:
def ContinuumNormalize(spectraDic,download):
    '''
    Input: spectraDic is {HIRES FILENAME: 1-D spectra array}
           download needs to be a hiresprv.download.Download instance 
           
    Output: {HIRES FILENAME: 1-D continumm Normalized spectra}
    
    Description: This function uses specutils' Spectrum1D function to determine the 
    continum of spectra. 
    '''
    #This is the same for all HIRES data 
    wl_solution = fits.getdata('http://caltech-ipac.github.io/hiresprv/_static/keck_rwav.fits')
    wl_solution = np.append(wl_solution[0],[wl_solution[i] for i in range(1,16)]) #UNITS: Angstrom
    wl_solution_micrometer = wl_solution*0.0001 #UNITS: µm
    spectral_axis_wl_solution = wl_solution*u.um
    photonEnergy = 1.2398 / wl_solution_micrometer #h*c/lambda = 1.2398 eV-µm/lambda 
    
    
    download_Location = download.localdir 
    #Continumm Normalize
    for filename in spectraDic:
        deblazedFlux = spectraDic[filename]
        hdu = fits.open("{0}/{1}.fits".format(download_Location,filename))
        timeElapsed = hdu[0].header["ELAPTIME"]
        hdu.close()
        photonsPersec = deblazedFlux/timeElapsed
    
        ergsPerSecFlux = photonEnergy*photonsPersec*u.Jy
        
        normalized_array = np.array([])
        for j in range(0,16): #Normalize each individual echelle order 
            i  = 4021 * j
            temp_echelle_flux = ergsPerSecFlux[i:i+4021]
            temp_echelle_wl = spectral_axis_wl_solution[i:i+4021]
            spectrum = Spectrum1D(flux=temp_echelle_flux, spectral_axis=temp_echelle_wl )
            g1_fit = fit_generic_continuum(spectrum)
            flux_fit = g1_fit(temp_echelle_wl)

            normalized_echelle = temp_echelle_flux / flux_fit
      
            #Converting to a float like this removes 2 decimal places from normalized_echelle
            normalized_echelle = np.array(list(map(np.float,normalized_echelle)))  
            
            normalized_array = np.append(normalized_array,normalized_echelle)
            
        spectraDic[filename] = normalized_array
        
    return spectraDic


In [7]:
def CrossCorrelate(filename_rv_df,normalized_Spectra):
    '''
    Input: filename_rv_df is the same dataframe produced by Find_and_download_all_rv_obs
           normalized_Spectra is the output of ContinuumNormalize
   
    Output:  {HIRES FILENAME: 1-D shifted spectra}
    
    Description: THIS FUNCTION ASSUMES YOU DID .dropna() ON THE DATAFRAME
    Uses Pyastronomy's crosscorrRV function to compute the cross correlation.  
    '''
    wvnum, wvlen, crf, tel, c, n = np.genfromtxt("../Atlases/solarAtlas.txt",skip_header=1,unpack=True)
    wvnum, wvlen, crf, tel, c, n = wvnum[::-1], wvlen[::-1], crf[::-1], tel[::-1], c[::-1], n[::-1] 
    
    wl_solution = fits.getdata('http://caltech-ipac.github.io/hiresprv/_static/keck_rwav.fits')
    wl_solution = np.append(wl_solution[0],[wl_solution[i] for i in range(1,16)])
    
    crossCorrelatedspectra = {} #Key: FILENAME Values: (correlated wavelength, normalized flux)
    for i in range(filename_rv_df.shape[0]):
        row = filename_rv_df.iloc[i]
        filename = row[1]
        RV = abs(row[2])
        
        normalizedFlux = normalized_Spectra[filename]
        
        rv, cc = pyasl.crosscorrRV(wl_solution, normalizedFlux, wvlen,c, -1*RV, RV, RV/100., skipedge=25)
        maxind = np.argmax(cc)
        argRV = rv[maxind]  #UNITS: km/s 
        
        # z = v_0/c    
        z = (argRV/299_792.458) #UNITS: None 
        computeShiftedWavelength = lambda wl: wl + wl*z #UNITS: Angstroms 
        shifted_wl = np.array(list(map(computeShiftedWavelength,wl_solution)))
        
        #Making the key the HIRES ID so I easily convert it back to the Spoc ID later
        crossCorrelatedspectra[row[0]] = (shifted_wl,normalizedFlux) 
    
    return crossCorrelatedspectra

In [8]:
def Interpolate(spectraDic):
    '''
    Input: spectraDic is the output of the function CrossCorrelate 
   
    Output:  {HIRES ID: 1-D interpolated spectra}, 1-D wavelength array that all the spectra use
    
    Description: THIS FUNCTION ASSUMES YOU DID .dropna() ON THE DATAFRAME
    This function not only returns a {} of the intertpolated spectra but it also, 
    downloads the interpolated wavelength to interpolated_wl.csv and downloads the
    fluxes to fluxes_for_HIRES.csv. 
    '''
    
    #Interpolate the spectra with each other to get the same wavelength scale for all of them.
    firstKey = next(iter(spectraDic))
    first_spectra = spectraDic[firstKey][0]
    wl_length = len(first_spectra)
    
    
    maxMinVal = float('-inf')
    minMaxVal = float('inf')
    #Finds the max minimum wavelength val & finds the min maximum wavelenght val 
    for spectra_flux_tuple in spectraDic.values(): 
        #Assumption: wavelength is sorted from the 0th index being min,
        #            the len(wavelength array)-1 is the max wavelength val,
        #            all the wavelength arrays are the same length.
        temp_spectra = spectra_flux_tuple[0]
        temp_min_wl = temp_spectra[0]
        temp_max_wl = temp_spectra[wl_length-1]
        
        if maxMinVal < temp_min_wl:
            maxMinVal = temp_min_wl
        if minMaxVal > temp_max_wl:
            minMaxVal = temp_max_wl
    
    #wavelength range 
    interpolate_over = [wl for wl in first_spectra if wl >= maxMinVal and wl<=minMaxVal]
    
    fluxDic = {}
    for HIRES_ID in spectraDic:
        wl = spectraDic[HIRES_ID][0]
        flux_norm = spectraDic[HIRES_ID][1]
        interpolated_flux = np.interp(interpolate_over,x,flux_norm)
        fluxDic[HIRES_ID] = interpolated_flux
    
    #Saving 
    np.savetxt("interpolated_wl.csv",interpolate_over,delimiter=",")
    fluxDF = pd.DataFrame(fluxDic)
    fluxDF.to_csv("fluxes_for_HIRES.csv",index_label=False,index=False)
    return fluxDF, interpolate_over
    

                                       Running

In [9]:
import time 

In [10]:
#RV
rv_start = time.time()
name_filename_rv_dic = Find_and_download_all_rv_obs(crossMatchedNames["HIRES"].to_numpy(),idl,state)
name_filename_rv_df = pd.DataFrame(name_filename_rv_dic)
name_filename_rv_df.to_csv("HIRES_Filename_rv.csv",index_label=False,index=False)
rv_time_elap = time.time() - rv_start

status= ok
msg= Script running in background. Consult monitor for status.


In [11]:
#Download Spectra 
download_start = time.time()
name_filename_rv_df = pd.read_csv("HIRES_Filename_rv.csv").dropna() 
data = Download('prv.cookies', './SpectraOutput') # For downloading spectra
spectraForStars = DownloadSpectra(name_filename_rv_df,data)
download_time_elap = time.time() - download_start

NameError: name 'download_time_elap' is not defined

In [36]:
#Continum Normalize
normalization_start = time.time()
normalizedSpectraDic = ContinuumNormalize(spectraForStars,data)
normalization_time_elap = time.time() - normalization_start 

In [37]:
correlated_start = time.time()
crossCorrelatedSpectra = CrossCorrelate(name_filename_rv_df,normalizedSpectraDic)
correlated_time_elap = time.time() - correlated_start

In [38]:
interpolate_start = time.time()
flux_df, interpolate_array = Interpolate(crossCorrelatedSpectra)
interpolate_time_elap = time.time() - interpolate_start

In [40]:
print(f"It took {rv_time_elap/60} minutes to complete the RV func")
print(f"It took {download_time_elap/60} minutes to complete the download spectra func")
print(f"It took {normalization_time_elap/60} minutes to complete the normalization func")
print(f"It took {correlated_time_elap/60} minutes to complete the cross-correlation func")
print(f"It took {interpolate_time_elap/60} minutes to complete the interpolate func")

It took 11.568602812290191 minutes to complete the RV func
It took 36.94154184261958 minutes to complete the download spectra func
It took 57.14264868895213 minutes to complete the normalization func
It took 123.83144724369049 minutes to complete the cross-correlation func
It took 1.183903447786967 minutes to complete the interpolate func
