In [1]:
cd ..

/home/astro/phrdhx/automated_exocomet_hunt


In [2]:
import sys
sys.path.append("/home/astro/phrdhx/automated_exocomet_hunt")
import numpy as np
import pandas as pd
import math
import os
import kplr
import data
import warnings
warnings.filterwarnings("ignore")
from astropy.io import fits
from astropy.table import Table, unique
from astropy.stats import sigma_clip, sigma_clipped_stats
from scipy.optimize import curve_fit
from astropy.timeseries import LombScargle
from analysis_tools_cython import *

In [3]:
def import_lightcurve(file_path, drop_bad_points=False,
                      ok_flags=[5]):
    """Returns (N by 2) table, columns are (time, flux).

    Flags deemed to be OK are:
    5 - reaction wheel zero crossing, matters for short cadence
    """

    try:
        hdulist = fits.open(file_path)
    except FileNotFoundError:
        print("Import failed: file not found")
        return

    scidata = hdulist[1].data
    if 'kplr' in file_path:
        table = Table(scidata)['TIME','PDCSAP_FLUX','SAP_QUALITY']
    elif 'tess' in file_path:
        #try:
        table = Table(scidata)['TIME','PDCSAP_FLUX','QUALITY']
        print(len(table), "length at import")
        print(type((table)['QUALITY'][0]))
        #except:
        #    time = scidata.TIME
        #    flux = scidata.PDCSAP_FLUX
        #    quality = scidata.QUALITY
        #    table = Table([time,flux,quality],names=('TIME','PDCSAP_FLUX','QUALITY'))


    if drop_bad_points:
        bad_points = []
        if 'kplr' in file_path:
            q_ind = get_quality_indices(table['SAP_QUALITY'])
        elif 'tess' in file_path:
            q_ind = get_quality_indices(table['QUALITY'])
        
        for j,q in enumerate(q_ind): # j=index, q=quality
            if j+1 not in ok_flags:
                bad_points += q.tolist() # adds bad_points by value of q (the quality indices) and converts to list
    

        # bad_points = [i for i in range(len(table)) if table[i][2]>0]
        table.remove_rows(bad_points)
        print(len(table),"length after drop_bad_points")

    # Delete rows containing NaN values. 
    ## if flux or time columns are NaN's, remove them.
    nan_rows = [ i for i in range(len(table)) if
            math.isnan(table[i][1]) or math.isnan(table[i][0]) ]

    table.remove_rows(nan_rows)

    # Smooth data by deleting overly 'spikey' points.
    ## if flux - 0.5*(difference between neihbouring points) > 3*(distance between neighbouring points), spike identified
    spikes = [ i for i in range(1,len(table)-1) if \
            abs(table[i][1] - 0.5*(table[i-1][1]+table[i+1][1])) \
            > 3*abs(table[i+1][1] - table[i-1][1])]

    ## flux smoothened out by changing those points to 0.5*distance between neighbouring points
    for i in spikes:
        table[i][1] = 0.5*(table[i-1][1] + table[i+1][1])
        
    print(len(table),"length at end")

    return table

def import_XRPlightcurve(file_path,sector,clip=4,drop_bad_points=True,ok_flags=[9],return_type='astropy'):
    """
    file_path: path to file
    sector = lightcurve sector
    drop_bad_points: Removing outlier points. Default False
    mad_plots: plots MAD comparisons
    q: lightcurve quality, default 0 (excludes all non-zero quality)
    clip: Sigma to be clipped by (default 3)
    return_type: Default 'astropy'. Pandas DataFrame also available with 'pandas' 

    returns
        - table: Astropy table of lightcurve
        - info: additional information about the lightcurve (TIC ID, RA, DEC, TESS magnitude, Camera, Chip)
    """
    lc = pd.read_pickle(file_path)

    for i in range(len(lc)):
        if isinstance(lc[i], np.ndarray):
            lc[i] = pd.Series(lc[i])
    for_df = lc[6:]  # TIC ID, RA, DEC, TESS magnitude, Camera, Chip
    columns = [
        "time",
        "raw flux",
        "corrected flux",
        "PCA flux",
        "flux error",
        "quality",
    ]
    df = pd.DataFrame(data=for_df).T 
    df.columns = columns
    
    table = Table.from_pandas(df)
    print(len(table),"length at import")
    # loading Ethan Kruse bad times
    bad_times = data.load_bad_times()
    bad_times = bad_times - 2457000
    # loading MAD 
    mad_df = data.load_mad()
    sec = sector
    camera = lc[4]
    mad_arr = mad_df.loc[:len(table)-1,f"{sec}-{camera}"]
    sig_clip = sigma_clip(mad_arr,sigma=clip,masked=True)

    # setting zero quality only
    #table = table[table['quality'] == 0]

    # applied MAD cut to keep points within selected sigma
    #mad_cut = mad_arr.values < med_sig_clip + clip*(rms_sig_clip)
    mad_cut = mad_arr.values < ~sig_clip.mask # --> check this one. Could it be .data?
    print(len(mad_cut),"length of mad cut")
    
    # return indices of values above MAD threshold
    matched_ind = np.where(~mad_cut) # indices of MAD's above threshold

  # Change quality of matched indices to 2**(17-1) (or add 2**(17-1) if existing flag already present)
    table['quality'][matched_ind] += 2**(17-1)
 
    table['quality'] = table['quality'].astype(np.int32) # int32 set so it can work with `get_quality_indices` function

    # Ethan Kruse bad time mask
    mask = np.ones_like(table['time'], dtype=bool)
    for i in bad_times:
        newchunk = (table['time']<i[0])|(table['time']>i[1])
        mask = mask & newchunk
        
    # Apply Kruse bad mask to table
    table = table[mask]

    if drop_bad_points:
        bad_points = []
        q_ind = get_quality_indices(table['quality'])
    
        for j,q in enumerate(q_ind): # j=index, q=quality
            if j+1 not in ok_flags:
                bad_points += q.tolist()
        table.remove_rows(bad_points)

        
    # if mad_plot:
    #     mad_plots(table=table,array=mad_arr,median=med_sig_clip,rms=rms_sig_clip,clip=clip,sector=sec,camera=camera)
    
    # completes masking of array elements representing non-zero flags (excludes quality flag 23; above MAD threshold values are excluded to get clean lightcurve)
    #table = table[table['quality'] == 0] 
    
    # Delete rows containing NaN values. 
    nan_rows = [ i for i in range(len(table)) if
            math.isnan(table[i][2]) or math.isnan(table[i][0]) ] # -> check this 

    table.remove_rows(nan_rows)
    print(len(table),"length after drop bad points")

    # Smooth data by deleting overly 'spikey' points.
    spikes = [ i for i in range(1,len(table)-1) if \
            abs(table[i][1] - 0.5*(table[i-1][1]+table[i+1][1])) \
            > 3*abs(table[i+1][1] - table[i-1][1])]

    for i in spikes:
        table[i][1] = 0.5*(table[i-1][1] + table[i+1][1])
    print(len(table),"length at end")

    if return_type == 'pandas':

        return table.to_pandas(), lc[0:6]
    else:

        return table, lc[0:6]


In [4]:
filename = 'betapic/tesslcs_sector_6_104_2_min_cadence_targets_tesslc_270577175.pkl'
filename_spoc = 'tess_SPOC/4112/0759/hlsp_tess-spoc_tess_phot_0000000141120759-s0006_tess_v1_lc.fits'

In [7]:
table = import_XRPlightcurve(filename,6, drop_bad_points=True,ok_flags=[17])[0]

993 length at import
993 length of mad cut
844 length after drop bad points
844 length at end


In [8]:
unique(table,keys='quality')

time,raw flux,corrected flux,PCA flux,flux error,quality
float64,float64,float64,float64,float64,int32
1469.0125148958496,347161.4921875,347159.51928986365,347257.51836509426,16.086396368743504,0
1471.700010107689,346899.203125,347069.55329667364,347039.16784235585,16.080735360786594,65536
