In [1]:
# Scientific imports

%matplotlib inline
from astropy.io import fits
import matplotlib.pyplot as plt
import numpy as np
from astroquery.mast import Observations
from astroquery.mast import Catalogs

# General imports

import csv, math, os, os.path, sys
import pandas as pd
import seaborn as sb
from sklearn import metrics
from IPython.display import display
sb.set()

In [2]:
# %load data-header-check.py
def dataAndHeaderCheck(f, log=False):
    """
    A function to make sure that all headers required for plotting a Light Curve
    are present, and alert the user if any are missing (and which ones).
    
    Input parameter (f) should be a path to a FITS file (local, cloud, etc)
    Input parameter (log) displays the printouts IFF set to True
    """
    
    # Flags
    allStellar = True
    allFitPara = True
    allFitData = True
    
    # List of needed headers
    stellar_params = ['TEFF', 'LOGG', 'TESSMAG']
    
    fit_params = ['TPERIOD', 'TDUR', 'TEPOCH', 'TDEPTH']
    
    fit_data = ['TIME', 'PHASE', 'LC_INIT', 'MODEL_INIT']
    
    # FITS Headers
    fh0 = fits.getheader(f, ext=0)
    fh1 = fits.getheader(f, ext=1)
    
    # FITS Columns
    fc = fits.getdata(f).columns
    
    # Loop through all headers and see if they are present using a Try/Except block
    if(log):
        print("Testing to see if all relevant information is present...")
    
    # First, the Stellar Parameters block
    for i in range (len(stellar_params)):
        try:
            fh0[stellar_params[i]]
        except:
            if(log):
                print("\tHeader {} not present!".format(stellar_params[i]))
            allStellar = False
    if(allStellar & log):
        print("\tAll Stellar Parameters present")
    
    # Next, the Fit Parameters block
    for i in range (len(fit_params)):
        try:
            fh1[fit_params[i]]
        except:
            if(log):
                print("\tFit Parameter {} not present!".format(fit_params[i]))
            allFitPara = False
    if(allFitPara & log):
        print("\tAll Fit Parameters present")
            
    # Lastly, the Fit Data block
    for i in range (len(fit_data)):
        try:
            fc[fit_data[i]]
        except:
            if(log):
                print("\tFit Data {} not present!".format(fit_data[i]))
            allFitData = False
    if(allFitData & log):
        print("\tAll Fit Data present")
        
    #allgood = (allStellar & allFitPara & allFitData)
    return (allStellar & allFitPara & allFitData)


So we need to check to see if there is _any_ variable including the name "epoch" and "phase" etc in the headers (be them TEPOCH, KEPOCH, etc)

In [3]:
fitsList = []
fits_directories = [x[0] for x in os.walk('.')]

for fits_root, fits_dirs, fits_files in os.walk('.'):
    for fits_file in fits_files:
        fullpath = os.path.join(fits_root, fits_file)
        if (os.path.splitext(fullpath.lower())[1]).endswith('.fits'):
            fitsList.append(fullpath)
print("Number of FITS files: {}".format(len(fitsList)))

Number of FITS files: 28


Below is a string-matching library/function to check the similarity of strings

In [4]:
from difflib import SequenceMatcher

def checkSimilar(a, b):
    return SequenceMatcher(None, a.upper(), b.upper()).ratio()

Using (https://docs.astropy.org/en/stable/io/fits/)[this page for source], we can see that using `list(X.keys())` will display a list of all of the identities in this list.

For example, `fits.getheader(fitsList[0], ext=0)[0]` is the same as `fits.getheader(fitsList[0], ext=0)['SIMPLE']`, i.e. the `[0]` is the same as `['SIMPLE']`, and using either will give us the _value_ of the item, but not the actual item _name_ itself; `list(X.keys())` gives us this list

In [5]:
def fitsSearch(terms, fList):
    for k in range(len(fList)):
        # Each "i" is just an array index
        
        print("fitsList[{}]:".format(k))
        
        fitsHeadList = []
        fitsHeadList.extend([fits.getheader(fList[k], ext=0)])
        fitsHeadList.extend([fits.getheader(fList[k], ext=1)])
        
        fitsKeyList = []
        fitsKeyList.extend([list(fitsHeadList[0].keys())])
        fitsKeyList.extend([list(fitsHeadList[1].keys())])
        
        for keyList in fitsKeyList:
            for key in keyList:
                threshold = 0.73
                for term in terms:
                    strCheck = checkSimilar(key, term)
                    if strCheck >= threshold:
                        print ("\t{} = {}".format(key, strCheck))

In [6]:
fitsSearch(["time", "period", "epoch"], fitsList)

fitsList[0]:
fitsList[1]:
	TPERIOD = 0.9230769230769231
	TEPOCH = 0.9090909090909091
fitsList[2]:
	ORBEPOCH = 0.7692307692307693
fitsList[3]:
fitsList[4]:
fitsList[5]:
fitsList[6]:
fitsList[7]:
	TPERIOD = 0.9230769230769231
	TEPOCH = 0.9090909090909091
fitsList[8]:
fitsList[9]:
fitsList[10]:
fitsList[11]:
fitsList[12]:
fitsList[13]:
fitsList[14]:
fitsList[15]:
fitsList[16]:
fitsList[17]:
	TPERIOD = 0.9230769230769231
	TEPOCH = 0.9090909090909091
fitsList[18]:
fitsList[19]:
fitsList[20]:
fitsList[21]:
fitsList[22]:
fitsList[23]:
fitsList[24]:
	TPERIOD = 0.9230769230769231
	TEPOCH = 0.9090909090909091
fitsList[25]:
fitsList[26]:
fitsList[27]:


In [7]:
fits.getheader(fitsList[1], ext=0)

SIMPLE  =                    T / conforms to FITS standards                     
BITPIX  =                    8 / array data type                                
NAXIS   =                    0 / number of array dimensions                     
EXTEND  =                    T / file contains extensions                       
NEXTEND =                    4 / number of standard extensions                  
EXTNAME = 'PRIMARY '           / name of extension                              
EXTVER  =                    1 / extension version number (not format version)  
SIMDATA =                    F / file is based on simulated data                
ORIGIN  = 'NASA/Ames'          / institution responsible for creating this file 
DATE    = '2018-10-06'         / file creation date.                            
TSTART  =    1354.105280000743 / observation start time in TJD                  
TSTOP   =    1381.518180460247 / observation stop time in TJD                   
DATE-OBS= '2018-08-23T14:30: