In [98]:
# %load master-import.py
#!/usr/bin/env python3

################################
# Scientific imports
###
%matplotlib inline
from astropy.io import fits
import matplotlib.pyplot as plt
import numpy as np
from astroquery.mast import Observations
from astroquery.mast import Catalogs

###
# General imports
###
import csv, math, io, os, os.path, sys, random
import pandas as pd
import seaborn as sb
import sklearn
from sklearn import metrics
from IPython.display import display

plt.rcParams["figure.figsize"] = (20,9)
sb.set()

###
# Global Variables
###
# Lists
fitsList=[]
starlist=[]
planetlist=[]
eblist=[]
beblist=[]
dataset = pd.DataFrame(columns=['id', 'vals', 'isplanet'])

# List Holder
alllists = {}

# Keep track of current LC and it's TIC identifier
lastRandom={
    "number": 0,
    "id": 0
}

################################
# Functions
###
# Function for Reading which LC datafiles we have into a list
def MakingAList(prnt=False):
    fl = []
    fitsroot = "SIM_DATA/"
    fits_directories = [x[0] for x in os.walk('./SIM_DATA/.', topdown=True)]

    for fitsroot, fits_dirs, fits_files in os.walk(fitsroot):
        for fits_file in fits_files:
            fullpath = os.path.join(fitsroot, fits_file)
            if (os.path.splitext(fullpath.lower())[1]).endswith('.fits'):
                fl.append(fullpath)
    if prnt==True:
        print("Number of FITS files: {}".format(len(fl)))
    #print(len(fl))
    return fl

# Chooses a random number
def GetRandomLC(n = None):
    global lastRandom
    #print("1: {}".format(n))
    if isinstance(n, int):
        if 0 <= n < len(fitsList):
            n = n
        else:
            n = random.randint(0,len(fitsList))
    else:
        n = random.randint(0,len(fitsList))
    
    lastRandom["number"] = n
    lastRandom["id"] = str(fitsList[n].split("-")[2].lstrip("0"))
    return n

def DrawACurve(n=None):
    rndFile = GetRandomLC() if n == None else GetRandomLC(n)
    fitsFile = fitsList[rndFile]
    
    # The following line of code gives us the header values
    fitsHeaders = fits.getheader(fitsFile)

    with fits.open(fitsFile, mode="readonly") as hdulist:

        # Extract stellar parameters from the primary header.  We'll get the effective temperature, surface gravity,
        # and TESS magnitude.
        star_teff = hdulist[0].header['TEFF']
        star_logg = hdulist[0].header['LOGG']
        star_tmag = hdulist[0].header['TESSMAG']
        obj = hdulist[0].header['OBJECT']
        sector = hdulist[0].header['SECTOR']

        # Extract some of the fit parameters for the first TCE.  These are stored in the FITS header of the first
        # extension.
        #period = hdulist[1].header['TPERIOD']
        #duration = hdulist[1].header['TDUR']
        duration = (hdulist[1].header['LIVETIME'])
        #epoch = hdulist[1].header['TEPOCH']
        #depth = hdulist[1].header['TDEPTH']

        # Extract some of the columns of interest for the first TCE signal.  These are stored in the binary FITS table
        # in the first extension.  We'll extract the timestamps in TBJD, phase, initial fluxes, and corresponding
        # model fluxes.
        #times = hdulist[1].data['TIME']
        #phases = hdulist[1].data['PHASE']
        #fluxes_init = hdulist[1].data['LC_INIT']
        #model_fluxes_init = hdulist[1].data['MODEL_INIT']
        tess_bjds = hdulist[1].data['TIME']
        sap_fluxes = hdulist[1].data['SAP_FLUX']
        pdcsap_fluxes = hdulist[1].data['PDCSAP_FLUX']

    # Define the epoch of primary transit in TBJD.  Our timestamps are also already in TBJD.
    #t0 = 1327.520678

    # Start figure and axis.
    fig, ax = plt.subplots()

    # Plot the timeseries in black circles.
    ## Using the [1:-1] identifier to cut off the leading and trailing zeroes

    ax.plot(tess_bjds[1:-1], pdcsap_fluxes[1:-1], 'k.', markersize=1)

    # Center the x-axis on where we expect a transit to be (time = T0), and set
    # the x-axis range within +/- 1 day of T0.
    ########ax.set_xlim(t0 - 1.0, t0 + 1.0)

    # Overplot a red vertical line that should be where the transit occurs.
    ########ax.axvline(x=t0, color="red")

    # Let's label the axes and define a title for the figure.
    fig.suptitle(CurrentLC())
    ax.set_ylabel("PDCSAP Flux (e-/s)")
    ax.set_xlabel("Time (TBJD)")

    # Adjust the left margin so the y-axis label shows up.
    plt.subplots_adjust(left=0.15)
    #plt.figure(figsize=(2,8))
    plt.show()
    
def LoadListGeneral(f):
    lst=[]
    try:
        # Assuming everything CAN go well, do this
        with open('./SIM_DATA/unpacked/{}'.format(f)) as df:
            csvdf = csv.reader(df)
            for lineholder in csvdf:
                line = lineholder[0]                # I don't know why but this makes it work better
                if line[0]!="#":                    # Ignore commented lines (lines w/ FIRST STRING ELEMENT is a # character)
                    lst.append(line.split()[0])       # Add line to list
                # endif
            # endfor
        # endwith
    except FileNotFoundError:
        print("FNF")
        return
    # end try
    return lst

def LoadList(itemtype="all"):
    
    pl="tsop301_planet_data.txt"
    sl="tsop301_star_data.txt"
    ebl="tsop301_eb_data.txt"
    bebl="tsop301_backeb_data.txt"
    
    foundflag=False
    
    # itemtype = (S)tar, (P)lanet, (E)clipsing (B)inary, or (B)ack (E)clipsing (B)inary
    if itemtype.lower() in ["s", "star", "all"]:
        foundflag = True
        global starlist
        starlist = LoadListGeneral(sl)
        print("Loading star list: {}".format(sl))
    if itemtype.lower() in ["p", "planet", "all"]:
        foundflag = True
        global planetlist
        planetlist = LoadListGeneral(pl)
        print ("loading planet list: {}".format(pl))
    if itemtype.lower() in ["eb", "eclipsing binary", "eclipsingbinary", "all"]:
        foundflag = True
        global eblist
        eblist = LoadListGeneral(ebl)
        print ("loading eb list: {}".format(ebl))
    if itemtype.lower() in ["beb", "back eclipsing binary", "backeclipsingbinary", "all"]:
        foundflag = True
        global beblist
        beblist = LoadListGeneral(bebl)
        print ("loading beb list: {}".format(bebl))
        
    if foundflag:
        global alllists
        alllists = {"s": starlist, "p": planetlist, "eb": eblist, "beb": beblist}
        return
    else:
        # If an invalid selection has been entered
        print("You must enter either:\n"
               "* \"S\" (or \"Star\")\n"
               "* \"P\" (or \"Planet\")\n"
               "* \"EB\" (or \"Eclipsing Binary\")\n"
               "* \"BEB\" (or \"Back Eclipsing Binary\")")
        
def IsThisAStar(n):
    return n in alllists["s"]
    
def IsThisAPlanet(n):
    return n in alllists["p"]

def IsThisAEB(n):
    return n in alllists["eb"]

def IsThisABEB(n):
    return n in alllists["beb"]

# Function to tell you what an item is
def WhatIsMyLC(n):
    lbl = []
    lbl.append("Star") if IsThisAStar(n) else lbl
    lbl.append("Planet") if IsThisAPlanet(n) else lbl
    lbl.append("EB") if IsThisAEB(n) else lbl
    lbl.append("BRB") if IsThisABEB(n) else lbl
    
    return "UNKNOWN" if lbl==[] else lbl

# Purely for convenience
def CurrentLC():
    return ("File № {} - {}".format(lastRandom["number"], lastRandom["id"]))
    
# MAKE ME BIG DATAFRAME
def MakeData():
    
    # Initiatate Dataframe
    df = pd.DataFrame(columns=['id', 'vals', 'isplanet'])
    
    # Loop for each FITS file
    for e, li in enumerate(fitsList[:101].copy()):
        with fits.open (li, memmap=False) as f:
            #Populate 'lastRandom' so store current number and id
            GetRandomLC(e)
            
            # Get number and id
            rnum = lastRandom["number"]
            rid = lastRandom["id"]
            
            #add Data
            df = df.append(pd.DataFrame([[rid,f[1].data['PDCSAP_FLUX'][1:-1],IsThisAPlanet(rid)]], columns=['id', 'vals', 'isplanet']), ignore_index=True)
            #df = df.append(pd.DataFrame([[rid,'LOLE',IsThisAPlanet(rid)]], columns=['id', 'vals', 'isplanet']), ignore_index=True)
    return(df)

################################
# RUN ALL INITIALISERS
###
def Initialise():
    global fitsList
    global dataset
    print("Populating fitsList...")
    fitsList = MakingAList()
    print("Loading the s/p/eb/beb Lists")
    LoadList()
    print("Populating the DataFrame")
    dataset = MakeData()

In [8]:
Initialise()

Populating fitsList...
Loading the s/p/eb/beb Lists
Loading star list: tsop301_star_data.txt
loading planet list: tsop301_planet_data.txt
loading eb list: tsop301_eb_data.txt
loading beb list: tsop301_backeb_data.txt
Populating the DataFrame


In [9]:
dataset['vals']

0      [179952.0, 179979.34, 179668.58, 179833.98, 17...
1      [21610.824, 21618.32, 21631.969, 21623.95, 216...
2      [108729.734, 108670.54, 108613.875, 108629.734...
3      [56039.613, 56027.977, 55994.34, 55974.965, 56...
4      [18019.508, 17992.531, 17918.803, 18050.266, 1...
                             ...                        
96     [6429.8755, 6417.58, 6425.976, 6428.759, 6433....
97     [33687.33, 33695.26, 33713.566, 33670.67, 3367...
98     [8813.81, 8802.651, 8807.448, 8780.694, 8820.8...
99     [3135.4832, 3158.8599, 3145.7952, 3146.2546, 3...
100    [1875865.1, 1876130.9, 1877120.5, 1875987.9, 1...
Name: vals, Length: 101, dtype: object

In [101]:
from tqdm.notebook import tqdm, trange
import time    # to be used in loop iterations

for i in range(100):
    pass

# Loop with a progress bar
for i in trange(100):
    time.sleep(0.01)

  0%|          | 0/100 [00:00<?, ?it/s]

In [102]:
def MakeDataBar():
    
    # Initiatate Dataframe
    df = pd.DataFrame(columns=['id', 'vals', 'isplanet'])
    
    # Loop for each FITS file
    for e, li in enumerate(tqdm(fitsList[1370:1400])):
        print("{}: Current File is {}".format((e+1370),li))
        with fits.open (li, memmap=False) as f:
            #Populate 'lastRandom' so store current number and id
            GetRandomLC(e)
            
            # Get number and id
            rnum = lastRandom["number"]
            rid = lastRandom["id"]
            
            #add Data
            df = df.append(pd.DataFrame([[rid,f[1].data['PDCSAP_FLUX'][1:-1],IsThisAPlanet(rid)]], columns=['id', 'vals', 'isplanet']), ignore_index=True)
            #df = df.append(pd.DataFrame([[rid,'LOLE',IsThisAPlanet(rid)]], columns=['id', 'vals', 'isplanet']), ignore_index=True)
        #time.sleep(0.0001)
    return(df)
MakeDataBar()

0it [00:00, ?it/s]

Unnamed: 0,id,vals,isplanet


Error seems to be in file `SIM_DATA/tess2018191215100-s0001-0000000177165931-0001-a_lc.fits` (file ID:1380, or the 1381st file)

In [54]:
errFile="SIM_DATA/tess2018191215100-s0001-0000000177165931-0001-a_lc.fits"
tstFile="SIM_DATA/tess2018191215100-s0001-0000000364110868-0001-a_lc.fits"
fits.info(errFile)

Filename: SIM_DATA/tess2018191215100-s0001-0000000177165931-0001-a_lc.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU      44   ()      
  1  LIGHTCURVE    1 BinTableHDU    158   20340R x 20C   [D, E, J, E, E, E, E, E, E, J, D, E, D, E, D, E, D, E, E, E]   


In [55]:
fits.info(tstFile)

Filename: SIM_DATA/tess2018191215100-s0001-0000000364110868-0001-a_lc.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU      44   ()      
  1  LIGHTCURVE    1 BinTableHDU    167   20340R x 20C   [D, E, J, E, E, E, E, E, E, J, D, E, D, E, D, E, D, E, E, E]   
  2  APERTURE      1 ImageHDU        49   (11, 11)   int32   


In [83]:
with fits.open(tstFile) as hdu:
    print(hdu[1].data['PDCSAP_FLUX'])

[    0.    52433.04  52440.36  ... 52402.484 52398.266     0.   ]


In [97]:
fits.getheader(tstFile, ext=1)['TTYPE8'];
fits.getheader(errFile, ext=1)['TTYPE8']

'PDCSAP_FLUX'

'PDCSAP_FLUX'

So, for some reason, it doesn't like this curve.