In [1]:
cd ..

/home/astro/phrdhx/automated_exocomet_hunt


In [2]:
import sys
sys.path.append("/home/astro/phrdhx/automated_exocomet_hunt")
import numpy as np
import pandas as pd
import math
import os
import kplr
import data
import warnings
warnings.filterwarnings("ignore")
from astropy.io import fits
from astropy.table import Table, unique
from astropy.stats import sigma_clip, sigma_clipped_stats
from scipy.optimize import curve_fit
from astropy.timeseries import LombScargle
from analysis_tools_cython import *

In [3]:
def import_lightcurve(file_path, drop_bad_points=False,
                      ok_flags=[5]):
    """Returns (N by 2) table, columns are (time, flux).

    Flags deemed to be OK are:
    5 - reaction wheel zero crossing, matters for short cadence
    """

    try:
        hdulist = fits.open(file_path)
    except FileNotFoundError:
        print("Import failed: file not found")
        return

    scidata = hdulist[1].data
    if 'kplr' in file_path:
        table = Table(scidata)['TIME','PDCSAP_FLUX','SAP_QUALITY']
    elif 'tess' in file_path:
        #try:
        table = Table(scidata)['TIME','PDCSAP_FLUX','QUALITY']
        print(len(table), "length at import")
        print(type((table)['QUALITY'][0]))
        #except:
        #    time = scidata.TIME
        #    flux = scidata.PDCSAP_FLUX
        #    quality = scidata.QUALITY
        #    table = Table([time,flux,quality],names=('TIME','PDCSAP_FLUX','QUALITY'))


    if drop_bad_points:
        bad_points = []
        if 'kplr' in file_path:
            q_ind = get_quality_indices(table['SAP_QUALITY'])
        elif 'tess' in file_path:
            q_ind = get_quality_indices(table['QUALITY'])
        
        for j,q in enumerate(q_ind): # j=index, q=quality
            if j+1 not in ok_flags:
                bad_points += q.tolist() # adds bad_points by value of q (the quality indices) and converts to list
    

        # bad_points = [i for i in range(len(table)) if table[i][2]>0]
        table.remove_rows(bad_points)
        print(len(table),"length after drop_bad_points")

    # Delete rows containing NaN values. 
    ## if flux or time columns are NaN's, remove them.
    nan_rows = [ i for i in range(len(table)) if
            math.isnan(table[i][1]) or math.isnan(table[i][0]) ]

    table.remove_rows(nan_rows)

    # Smooth data by deleting overly 'spikey' points.
    ## if flux - 0.5*(difference between neihbouring points) > 3*(distance between neighbouring points), spike identified
    spikes = [ i for i in range(1,len(table)-1) if \
            abs(table[i][1] - 0.5*(table[i-1][1]+table[i+1][1])) \
            > 3*abs(table[i+1][1] - table[i-1][1])]

    ## flux smoothened out by changing those points to 0.5*distance between neighbouring points
    for i in spikes:
        table[i][1] = 0.5*(table[i-1][1] + table[i+1][1])
        
    print(len(table),"length at end")

    return table

def import_XRPlightcurve(file_path,sector,clip=4,drop_bad_points=True,ok_flags=[9],return_type='astropy'):
    """
    file_path: path to file
    sector = lightcurve sector
    drop_bad_points: Removing outlier points. Default False
    mad_plots: plots MAD comparisons
    q: lightcurve quality, default 0 (excludes all non-zero quality)
    clip: Sigma to be clipped by (default 3)
    return_type: Default 'astropy'. Pandas DataFrame also available with 'pandas' 

    returns
        - table: Astropy table of lightcurve
        - info: additional information about the lightcurve (TIC ID, RA, DEC, TESS magnitude, Camera, Chip)
    """
    lc = pd.read_pickle(file_path)

    for i in range(len(lc)):
        if isinstance(lc[i], np.ndarray):
            lc[i] = pd.Series(lc[i])
    for_df = lc[6:]  # TIC ID, RA, DEC, TESS magnitude, Camera, Chip
    columns = [
        "time",
        "raw flux",
        "corrected flux",
        "PCA flux",
        "flux error",
        "quality",
    ]
    df = pd.DataFrame(data=for_df).T 
    df.columns = columns
    
    table = Table.from_pandas(df)
    print(len(table),"length at import")
    # loading Ethan Kruse bad times
    bad_times = data.load_bad_times()
    bad_times = bad_times - 2457000
    # loading MAD 
    mad_df = data.load_mad()
    sec = sector
    camera = lc[4]
    mad_arr = mad_df.loc[:len(table)-1,f"{sec}-{camera}"]
    sig_clip = sigma_clip(mad_arr,sigma=clip,masked=True)

    # setting zero quality only
    #table = table[table['quality'] == 0]

    # applied MAD cut to keep points within selected sigma
    #mad_cut = mad_arr.values < med_sig_clip + clip*(rms_sig_clip)
    mad_cut = mad_arr.values < ~sig_clip.mask # --> check this one. Could it be .data?
    print(len(mad_cut),"length of mad cut")
    
    # return indices of values above MAD threshold
    matched_ind = np.where(~mad_cut) # indices of MAD's above threshold

    # a bit of pandas trickery to make quality = 23, but not overriding existing flags
    df = table.to_pandas()
    b = pd.Series(np.asarray(matched_ind)[0])
    sliced = df.iloc[b]
    sliced['quality'][sliced['quality'] == 0] = 2**9
    df['quality'].iloc[sliced[sliced.quality == 2**9].index] = 2**9
    
    table = Table.from_pandas(df) 
    
    
    table['quality'] = table['quality'].astype(np.int32) # int32 set so it can work with get_quality_indices
#     if table['quality'][np.array(matched_ind)] == 0:
#          # so that this doesn't overwrite other quality flags
     # set quality flag 23
    

    # Ethan Kruse bad time mask
    mask = np.ones_like(table['time'], dtype=bool)
    for i in bad_times:
        newchunk = (table['time']<i[0])|(table['time']>i[1])
        mask = mask & newchunk
        
    # Apply Kruse bad mask to table
    table = table[mask]

    if drop_bad_points:
        bad_points = []
        q_ind = get_quality_indices(table['quality'])
    
        for j,q in enumerate(q_ind): # j=index, q=quality
            if j+1 not in ok_flags:
                bad_points += q.tolist()
        table.remove_rows(bad_points)

        
    # if mad_plot:
    #     mad_plots(table=table,array=mad_arr,median=med_sig_clip,rms=rms_sig_clip,clip=clip,sector=sec,camera=camera)
    
    # completes masking of array elements representing non-zero flags (excludes quality flag 23; above MAD threshold values are excluded to get clean lightcurve)
    #table = table[table['quality'] == 0] 
    
    # Delete rows containing NaN values. 
    nan_rows = [ i for i in range(len(table)) if
            math.isnan(table[i][2]) or math.isnan(table[i][0]) ] # -> check this 

    table.remove_rows(nan_rows)
    print(len(table),"length after drop bad points")

    # Smooth data by deleting overly 'spikey' points.
    spikes = [ i for i in range(1,len(table)-1) if \
            abs(table[i][1] - 0.5*(table[i-1][1]+table[i+1][1])) \
            > 3*abs(table[i+1][1] - table[i-1][1])]

    for i in spikes:
        table[i][1] = 0.5*(table[i-1][1] + table[i+1][1])
    print(len(table),"length at end")

    if return_type == 'pandas':

        return table.to_pandas(), lc[0:6]
    else:

        return table, lc[0:6]


In [4]:
filename = 'tess_testlcs/XRP/tesslcs_sector_6_104_2_min_cadence_targets_tesslc_270577175.pkl'
filename_spoc = 'tess_SPOC/4112/0759/hlsp_tess-spoc_tess_phot_0000000141120759-s0006_tess_v1_lc.fits'

In [5]:
table = import_XRPlightcurve(filename,6, drop_bad_points=False,ok_flags=[9])[0]

993 length at import
993 length of mad cut
903 length after drop bad points
903 length at end


In [6]:
unique(table,keys='quality')

time,raw flux,corrected flux,PCA flux,flux error,quality
float64,float64,float64,float64,float64,int32
1469.0125148958496,347161.4921875,347159.51928986365,347257.51836509426,16.086396368743504,0
1471.5333439097126,346907.34375,347089.35520181863,347011.2598491735,16.080502407518946,128
1471.512510627757,346922.1171875,308044.8395765959,307987.09446778125,15.15201898095494,164
1471.700010107689,346899.203125,347069.55329667364,347039.16784235585,16.080735360786594,512
1469.3250146366631,347009.7890625,347043.6325960738,347136.6741403208,16.082923343683778,4096
1472.1375088946793,346905.890625,347081.44201995654,347034.7448228531,16.08121145742685,4224
1474.637500883567,346940.6328125,309396.38672819966,309385.64698206505,15.186526671566195,4260


In [7]:
mad_df = data.load_mad()
sec = 6
camera = 1
mad_arr = mad_df.loc[:len(table)-1,f"{sec}-{camera}"]
sig_clip = sigma_clip(mad_arr,sigma=4,masked=True)
# setting zero quality only
#table = table[table['quality'] == 0]

# applied MAD cut to keep points within selected sigma
mad_cut = mad_arr.values < ~sig_clip.mask #(med_sig_clip + clip*(rms_sig_clip))


# return indices of values above MAD threshold
matched_ind = np.where(~mad_cut) 
#indexes = table['quality'][matched_ind][table[matched_ind]['quality'] == 0]
#table['quality'][matched_ind][table[matched_ind]['quality'] == 0][:]

In [8]:
table[matched_ind]

time,raw flux,corrected flux,PCA flux,flux error,quality
float64,float64,float64,float64,float64,int32
1469.0125148958496,347161.4921875,347159.51928986365,347257.51836509426,16.086396368743504,0
1469.033348205488,347138.15625,347130.80441512656,347233.69674245414,16.0858834308829,0
1469.05418151656,347137.9453125,347154.0474762639,347237.91987667594,16.086005323284652,0
1469.075014829015,347115.9453125,347137.66357271076,347224.79454237444,16.08544494260152,0
1469.0958481427742,347092.8984375,347135.54636229546,347198.3862718676,16.084751257711787,0
1469.1166814577352,347085.109375,347122.48446406104,347188.17678715877,16.084545873813063,0
1469.1375147737685,347078.0546875,347115.2081440332,347179.6375294337,16.08437582534423,0
1469.1583480907243,347062.7890625,347119.95822593133,347193.874734803,16.08402671044064,0
1469.1791814084336,347063.40625,347110.70301108615,347171.5797639444,16.084105451692125,0
1469.2000147267115,347040.3671875,347099.42433376826,347159.7510696048,16.08360714588746,0


In [9]:
#table['quality'][matched_ind] = 23

In [10]:
unique(table[matched_ind],keys='quality')

time,raw flux,corrected flux,PCA flux,flux error,quality
float64,float64,float64,float64,float64,int32
1469.0125148958496,347161.4921875,347159.51928986365,347257.51836509426,16.086396368743504,0
1469.3250146366631,347009.7890625,347043.6325960738,347136.6741403208,16.082923343683778,4096


---

In [11]:
df = table.to_pandas()

In [12]:
df.head()

Unnamed: 0,time,raw flux,corrected flux,PCA flux,flux error,quality
0,1469.012515,347161.492188,347159.51929,347257.518365,16.086396,0
1,1469.033348,347138.15625,347130.804415,347233.696742,16.085883,0
2,1469.054182,347137.945312,347154.047476,347237.919877,16.086005,0
3,1469.075015,347115.945312,347137.663573,347224.794542,16.085445,0
4,1469.095848,347092.898438,347135.546362,347198.386272,16.084751,0


In [13]:
b = pd.Series(np.asarray(matched_ind)[0])

sliced = df.iloc[b]

In [14]:
sliced['quality'][sliced['quality'] != 0] = sliced['quality'][sliced['quality'] != 0] + 2**13

In [15]:
sliced['quality'][sliced['quality'] == 0] = 2**13

In [16]:
sliced[sliced.quality == 2**13]

Unnamed: 0,time,raw flux,corrected flux,PCA flux,flux error,quality
0,1469.012515,347161.492188,347159.519290,347257.518365,16.086396,8192
1,1469.033348,347138.156250,347130.804415,347233.696742,16.085883,8192
2,1469.054182,347137.945312,347154.047476,347237.919877,16.086005,8192
3,1469.075015,347115.945312,347137.663573,347224.794542,16.085445,8192
4,1469.095848,347092.898438,347135.546362,347198.386272,16.084751,8192
...,...,...,...,...,...,...
477,1481.179132,347170.789062,347123.838861,347144.420392,16.087107,8192
479,1481.220799,347171.000000,347126.166004,347149.578093,16.087315,8192
572,1483.158285,347214.175781,347086.309591,347080.969881,16.086505,8192
722,1486.283258,347186.750000,347050.615707,347040.128334,16.086750,8192


In [17]:
sliced[sliced['quality'] != 2**13]

Unnamed: 0,time,raw flux,corrected flux,PCA flux,flux error,quality
15,1469.325015,347009.789062,347043.632596,347136.67414,16.082923,12288
16,1469.345848,347025.140625,347127.88504,347157.948649,16.083208,12288
23,1469.491681,347032.609375,347153.80384,347183.477857,16.083633,12288
25,1469.533348,346994.34375,347116.872346,347134.980783,16.082602,12288
428,1480.158306,347072.515625,347086.563048,347105.36218,16.084302,12288
438,1480.366638,347119.148438,347135.449067,347138.415214,16.0854,12288
460,1480.824968,347164.078125,347069.248887,347142.087855,16.086442,12288


In [19]:
df['quality'].iloc[sliced[sliced.quality == 2**13].index] = 2**13

In [20]:
# df['quality'].iloc[sliced[sliced.quality != 2**13].index] final integration - need to work on this line.

In [18]:
df[df.quality == 0]

Unnamed: 0,time,raw flux,corrected flux,PCA flux,flux error,quality
21,1469.450015,347016.695312,347115.735140,347158.524772,16.083107,0
22,1469.470848,347040.515625,347143.991773,347178.177125,16.083679,0
24,1469.512514,347016.851562,347121.626122,347169.274679,16.082983,0
26,1469.554181,346971.101562,347097.125073,347106.428906,16.082104,0
27,1469.575014,346971.054688,347099.939358,347112.459948,16.082127,0
...,...,...,...,...,...,...
895,1489.887389,347239.562500,347126.248822,347128.350173,16.088161,0
896,1489.908222,347239.937500,347157.423757,347153.979670,16.088591,0
897,1489.929055,347240.312500,347145.468019,347142.539587,16.088014,0
899,1489.970721,347235.718750,347105.731310,347101.932603,16.087104,0


In [19]:
    # a bit of pandas trickery to make quality = 23, but not overriding existing flags
#     df = table.to_pandas()
#     b = pd.Series(np.asarray(matched_ind)[0])
#     sliced = df.iloc[b]
#     sliced['quality'][sliced['quality'] == 0] = 23
#     df['quality'].iloc[sliced[sliced.quality == 23].index] = 23
#     table = Table.from_pandas(df) 
#     print(table[table['quality'] == 0])

In [20]:
table = Table.from_pandas(df)

In [21]:
table[table['quality'] == 0]

time,raw flux,corrected flux,PCA flux,flux error,quality
float64,float64,float64,float64,float64,int32
1469.450014515277,347016.6953125,347115.7351398922,347158.524772206,16.083106927151103,0
1469.470847822847,347040.515625,347143.99177325075,347178.1771247882,16.083679011028078,0
1469.5125144324738,347016.8515625,347121.6261217029,347169.2746787899,16.082982640898468,0
1469.5541810346715,346971.1015625,347097.12507285277,347106.4289057238,16.082103833786896,0
1469.575014333061,346971.0546875,347099.939357953,347112.45994798635,16.082127316706632,0
1469.6166809248168,346971.1328125,347081.4644069406,347120.0160347029,16.08209505733334,0
1469.6375142184202,346948.4375,347081.0396248829,347082.60172052094,16.081448670222713,0
1469.6583475107009,346948.484375,347099.88942833594,347096.2876213006,16.08158103344184,0
1469.7000140919915,346940.96875,347071.72961948003,347090.4166851069,16.08136825547695,0
1469.7208473813894,346940.96875,347086.9376961093,347100.5152791894,16.081270998167525,0


---

## Updated Functions

In [22]:
def import_XRPlightcurve(file_path,sector,clip=4,drop_bad_points=True,ok_flags=[23],return_type='astropy'):
    """
    file_path: path to file
    sector = lightcurve sector
    drop_bad_points: Removing outlier points. Default False
    mad_plots: plots MAD comparisons
    q: lightcurve quality, default 0 (excludes all non-zero quality)
    clip: Sigma to be clipped by (default 3)
    return_type: Default 'astropy'. Pandas DataFrame also available with 'pandas' 

    returns
        - table: Astropy table of lightcurve
        - info: additional information about the lightcurve (TIC ID, RA, DEC, TESS magnitude, Camera, Chip)
    """
    lc = pd.read_pickle(file_path)

    for i in range(len(lc)):
        if isinstance(lc[i], np.ndarray):
            lc[i] = pd.Series(lc[i])
    for_df = lc[6:]  # TIC ID, RA, DEC, TESS magnitude, Camera, Chip
    columns = [
        "time",
        "raw flux",
        "corrected flux",
        "PCA flux",
        "flux error",
        "quality",
    ]
    df = pd.DataFrame(data=for_df).T 
    df.columns = columns
    
    table = Table.from_pandas(df)
    print(len(table),"length at import")
    # loading Ethan Kruse bad times
    bad_times = data.load_bad_times()
    bad_times = bad_times - 2457000
    # loading MAD 
    mad_df = data.load_mad()
    sec = sector
    camera = lc[4]
    mad_arr = mad_df.loc[:len(table)-1,f"{sec}-{camera}"]
    sig_clip = sigma_clip(mad_arr,sigma=clip,masked=True)
    med_sig_clip = np.nanmedian(sig_clip)
    rms_sig_clip = np.nanstd(sig_clip)
    # setting zero quality only
    #table = table[table['quality'] == 0]

    # applied MAD cut to keep points within selected sigma
    #mad_cut = mad_arr.values < med_sig_clip + clip*(rms_sig_clip)
    mad_cut = mad_arr.values < ~sig_clip.mask # --> check this one. Could it be .data?
    print(len(mad_cut),"length of mad cut")
    
    # return indices of values above MAD threshold
    matched_ind = np.where(~mad_cut)

    # a bit of pandas trickery to make quality = 23, but not overriding existing flags
    df = table.to_pandas()
    b = pd.Series(np.asarray(matched_ind)[0])
    sliced = df.iloc[b]
    sliced['quality'][sliced['quality'] == 0] = 23
    df['quality'].iloc[sliced[sliced.quality == 23].index] = 23
    
    table = Table.from_pandas(df) 
    
    
    table['quality'] = table['quality'].astype(np.int32) # int32 set so it can work with get_quality_indices
#     if table['quality'][np.array(matched_ind)] == 0:
#          # so that this doesn't overwrite other quality flags
     # set quality flag 23
    

    # Ethan Kruse bad time mask
    mask = np.ones_like(table['time'], dtype=bool)
    for i in bad_times:
        newchunk = (table['time']<i[0])|(table['time']>i[1])
        mask = mask & newchunk
        
    # Apply Kruse bad mask to table
    table = table[mask]

    if drop_bad_points:
        bad_points = []
        q_ind = get_quality_indices(table['quality'])
    
        for j,q in enumerate(q_ind): # j=index, q=quality
            if j+1 not in ok_flags:
                bad_points += q.tolist()
        table.remove_rows(bad_points)

        
    # if mad_plot:
    #     mad_plots(table=table,array=mad_arr,median=med_sig_clip,rms=rms_sig_clip,clip=clip,sector=sec,camera=camera)
    
    # completes masking of array elements representing non-zero flags (excludes quality flag 23; above MAD threshold values are excluded to get clean lightcurve)
    #table = table[table['quality'] == 0] 
    
    # Delete rows containing NaN values. 
    nan_rows = [ i for i in range(len(table)) if
            math.isnan(table[i][2]) or math.isnan(table[i][0]) ] # -> check this 

    table.remove_rows(nan_rows)
    print(len(table),"length after drop bad points")

    # Smooth data by deleting overly 'spikey' points.
    spikes = [ i for i in range(1,len(table)-1) if \
            abs(table[i][1] - 0.5*(table[i-1][1]+table[i+1][1])) \
            > 3*abs(table[i+1][1] - table[i-1][1])]

    for i in spikes:
        table[i][1] = 0.5*(table[i-1][1] + table[i+1][1])
    print(len(table),"length at end")

    if return_type == 'pandas':

        return table.to_pandas(), lc[0:6]
    else:

        return table, lc[0:6]

In [23]:
table = import_XRPlightcurve(filename,6,ok_flags=4096)[0]

993 length at import
993 length of mad cut


TypeError: argument of type 'int' is not iterable

In [None]:
len(table)