In [1]:
# Scientific imports

%matplotlib inline
from astropy.io import fits
import matplotlib.pyplot as plt
import numpy as np
from astroquery.mast import Observations
from astroquery.mast import Catalogs

# General imports

import csv, math, os, os.path, sys
import pandas as pd
import seaborn as sb
from sklearn import metrics
from IPython.display import display

sb.set()

In [2]:
So we need to check to see if there is _any_ variable including the name "epoch" and "phase" etc in the headers (be them TEPOCH, KEPOCH, etc)# %load header_check.py
def areAllHeadersPresent(heads):
    """
    A function to make sure that all headers required for plotting a Light Curve
    are present, and alert the user if any are missing (and which ones).
    
    Input parameter (heads) should be of the form: heads=fits.getheader("file.fits")
    """
    
    # Generic Flag
    allgood = True
    
    # List of needed headers
    neededHeaders = ['TEFF', 'LOGG', 'TESSMAG',
                     'TPERIOD', 'TDUR', 'TEPOCH', 'TDEPTH',
                     'TIME', 'PHASE', 'LC_INIT', 'MODEL_INIT']
    
    # Loop through all headers and see if they are present using a Try/Except block
    for i in range (len(neededHeaders)):
        try:
            heads[neededHeaders[i]]
        except:
            print("Header {} not present!".format(neededHeaders[i]))
            allgood = False
        #else:
        #    print("{}: {}".format(neededHeaders[i], heads[neededHeaders[i]]))
    return allgood


In [3]:
# Reading which LC datafiles we have into a list

fitsList = []
fits_directories = [x[0] for x in os.walk('.')]

for fits_root, fits_dirs, fits_files in os.walk('.'):
    for fits_file in fits_files:
        fullpath = os.path.join(fits_root, fits_file)
        if (os.path.splitext(fullpath.lower())[1]).endswith('.fits'):
            fitsList.append(fullpath)
print("Number of FITS files: {}".format(len(fitsList)))

Number of FITS files: 28


This says 28 files, where in my last notebook it said 27. The reason for this was because I decided to manually download the FITS file used in [This Notebook](./20191012%20-%20Light%20Curve%20Tutorial%201.ipynb) and run my checking program on it, as I *know* that it has the correct parameters

In [4]:
# This is the one we downloaded
# fits.getheader(fitsList[1])

In [5]:
areAllHeadersPresent(fits.getheader(fitsList[1]))

Header TPERIOD not present!
Header TDUR not present!
Header TEPOCH not present!
Header TDEPTH not present!
Header TIME not present!
Header PHASE not present!
Header LC_INIT not present!
Header MODEL_INIT not present!


False

So, obviously, this is wrong, and as such my checking function is incorrect! Time to work out why, and how to fix it.

In [6]:
fits.getdata(fitsList[1]).columns

ColDefs(
    name = 'TIME'; format = 'D'; unit = 'BJD - 2457000, days'; disp = 'D14.7'
    name = 'TIMECORR'; format = 'E'; unit = 'd'; disp = 'E14.7'
    name = 'CADENCENO'; format = 'J'; disp = 'I10'
    name = 'PHASE'; format = 'E'; unit = 'days'; disp = 'E14.7'
    name = 'LC_INIT'; format = 'E'; unit = 'dimensionless'; disp = 'E14.7'
    name = 'LC_INIT_ERR'; format = 'E'; unit = 'dimensionless'; disp = 'E14.7'
    name = 'LC_WHITE'; format = 'E'; unit = 'dimensionless'; disp = 'E14.7'
    name = 'LC_DETREND'; format = 'E'; unit = 'dimensionless'; disp = 'E14.7'
    name = 'MODEL_INIT'; format = 'E'; unit = 'dimensionless'; disp = 'E14.7'
    name = 'MODEL_WHITE'; format = 'E'; unit = 'dimensionless'; disp = 'E14.7'
)

So we can see that `fits.getheader(<file.fits>)` and `fits.getdata(<file.fits>).columns` give different header information, and both need to be checked to confirm the presence of these required(?) headers

In [7]:
X = fitsList[1]
fits.getdata(X).columns
fits.getheader(X)

SIMPLE  =                    T / conforms to FITS standards                     
BITPIX  =                    8 / array data type                                
NAXIS   =                    0 / number of array dimensions                     
EXTEND  =                    T / file contains extensions                       
NEXTEND =                    4 / number of standard extensions                  
EXTNAME = 'PRIMARY '           / name of extension                              
EXTVER  =                    1 / extension version number (not format version)  
SIMDATA =                    F / file is based on simulated data                
ORIGIN  = 'NASA/Ames'          / institution responsible for creating this file 
DATE    = '2018-10-06'         / file creation date.                            
TSTART  =    1354.105280000743 / observation start time in TJD                  
TSTOP   =    1381.518180460247 / observation stop time in TJD                   
DATE-OBS= '2018-08-23T14:30:

In [8]:
with fits.open(fitsList[1], mode="readonly") as hdulist:
    
    print(hdulist)
    
    # Extract stellar parameters from the primary header.  We'll get the effective temperature, surface gravity,
    # and TESS magnitude.
    star_teff = hdulist[0].header['TEFF']
    star_logg = hdulist[0].header['LOGG']
    star_tmag = hdulist[0].header['TESSMAG']
    
    # Extract some of the fit parameters for the first TCE.  These are stored in the FITS header of the first
    # extension.
    period = hdulist[1].header['TPERIOD']
    duration = hdulist[1].header['TDUR']
    epoch = hdulist[1].header['TEPOCH']
    depth = hdulist[1].header['TDEPTH']
    
    # Extract some of the columns of interest for the first TCE signal.  These are stored in the binary FITS table
    # in the first extension.  We'll extract the timestamps in TBJD, phase, initial fluxes, and corresponding
    # model fluxes.
    times = hdulist[1].data['TIME']
    phases = hdulist[1].data['PHASE']
    fluxes_init = hdulist[1].data['LC_INIT']
    model_fluxes_init = hdulist[1].data['MODEL_INIT']

[<astropy.io.fits.hdu.image.PrimaryHDU object at 0x7fce21653668>, <astropy.io.fits.hdu.table.BinTableHDU object at 0x7fce2164e5c0>, <astropy.io.fits.hdu.table.BinTableHDU object at 0x7fce21649cc0>, <astropy.io.fits.hdu.table.BinTableHDU object at 0x7fce216752b0>]


- `fits.getheader(f, ext=0)` gives us our Stellar Parameters (so, in the above code, `hdulist[0].header['XXXX']`)
- `fits.getheader(f, ext=1)` gives us our Fit Paramters (so, in the above code, `hdulist[1].header['XXXX']`)
- `fits.getdata(f).columns` gives us our Fit Values (so, in the above code, `hdulist[1].data['XXXX']`)

In [11]:
So we need to check to see if there is _any_ variable including the name "epoch" and "phase" etc in the headers (be them TEPOCH, KEPOCH, etc)# %load data-header-check.py
def dataAndHeaderCheck(f, log=False):
    """
    A function to make sure that all headers required for plotting a Light Curve
    are present, and alert the user if any are missing (and which ones).
    
    Input parameter (f) should be a path to a FITS file (local, cloud, etc)
    Input parameter (log) displays the printouts IFF set to True
    """
    
    # Flags
    allStellar = True
    allFitPara = True
    allFitData = True
    
    # List of needed headers
    stellar_params = ['TEFF', 'LOGG', 'TESSMAG']
    
    fit_params = ['TPERIOD', 'TDUR', 'TEPOCH', 'TDEPTH']
    
    fit_data = ['TIME', 'PHASE', 'LC_INIT', 'MODEL_INIT']
    
    # FITS Headers
    fh0 = fits.getheader(f, ext=0)
    fh1 = fits.getheader(f, ext=1)
    
    # FITS Columns
    fc = fits.getdata(f).columns
    
    # Loop through all headers and see if they are present using a Try/Except block
    if(log):
        print("Testing to see if all relevant information is present...")
    
    # First, the Stellar Parameters block
    for i in range (len(stellar_params)):
        try:
            fh0[stellar_params[i]]
        except:
            if(log):
                print("\tHeader {} not present!".format(stellar_params[i]))
            allStellar = False
    if(allStellar & log):
        print("\tAll Stellar Parameters present")
    
    # Next, the Fit Parameters block
    for i in range (len(fit_params)):
        try:
            fh1[fit_params[i]]
        except:
            if(log):
                print("\tFit Parameter {} not present!".format(fit_params[i]))
            allFitPara = False
    if(allFitPara & log):
        print("\tAll Fit Parameters present")
            
    # Lastly, the Fit Data block
    for i in range (len(fit_data)):
        try:
            fc[fit_data[i]]
        except:
            if(log):
                print("\tFit Data {} not present!".format(fit_data[i]))
            allFitData = False
    if(allFitData & log):
        print("\tAll Fit Data present")
        
    #allgood = (allStellar & allFitPara & allFitData)
    return (allStellar & allFitPara & allFitData)


In [12]:
dataAndHeaderCheck(X)

True

Ah-ha! Now we have a function that works! Let's see if any of the other files have missing info!

(I suspect some will, for reasons mentioned before, such as "**T**EPOCH" being for a TESS mission)

In [13]:
for i in range(len(fitsList)):
    #tmp = fits.getheader(fitsList[i])
    print("{}:   {}".format(i, dataAndHeaderCheck(fitsList[i], True)))

Testing to see if all relevant information is present...
	Header TESSMAG not present!
	Fit Parameter TPERIOD not present!
	Fit Parameter TDUR not present!
	Fit Parameter TEPOCH not present!
	Fit Parameter TDEPTH not present!
	Fit Data PHASE not present!
	Fit Data LC_INIT not present!
	Fit Data MODEL_INIT not present!
0:   False
Testing to see if all relevant information is present...
	All Stellar Parameters present
	All Fit Parameters present
	All Fit Data present
1:   True
Testing to see if all relevant information is present...
	Header TEFF not present!
	Header LOGG not present!
	Header TESSMAG not present!
	Fit Parameter TPERIOD not present!
	Fit Parameter TDUR not present!
	Fit Parameter TEPOCH not present!
	Fit Parameter TDEPTH not present!
	Fit Data TIME not present!
	Fit Data PHASE not present!
	Fit Data LC_INIT not present!
	Fit Data MODEL_INIT not present!
2:   False
Testing to see if all relevant information is present...
	All Stellar Parameters present
	Fit Parameter TPERIOD

What I want to do now is to change the above function to make sure that it can identify ANY parameter that ends in "EPOCH" or "PERIOD" or "DUR", etc, to make sure I find all parameters regardless of what mission found them (Kepler = KEPOCH, KPERIOD, etc)

_Alternatively_, I could _only_ look for missions performed by the TESS for now, and expand later on. That sounds like a good idea to me, otherwise I might end up stuck on this one step for ages.