In [1]:
cd ..

/home/astro/phrdhx/automated_exocomet_hunt


In [2]:
import pandas as pd
from astropy.io import fits
from astropy.table import Table, unique
from astropy.stats import sigma_clip, sigma_clipped_stats
from scipy.optimize import curve_fit
from astropy.timeseries import LombScargle
import numpy as np
from analysis_tools_cython import *

import math
import sys,os
import kplr
import data
import warnings
warnings.filterwarnings("ignore")


In [3]:
def import_lightcurve(file_path, drop_bad_points=False,
                      ok_flags=[5]):
    """Returns (N by 2) table, columns are (time, flux).

    Flags deemed to be OK are:
    5 - reaction wheel zero crossing, matters for short cadence
    """

    try:
        hdulist = fits.open(file_path)
    except FileNotFoundError:
        print("Import failed: file not found")
        return

    scidata = hdulist[1].data
    if 'kplr' in file_path:
        table = Table(scidata)['TIME','PDCSAP_FLUX','SAP_QUALITY']
    elif 'tess' in file_path:
        #try:
        table = Table(scidata)['TIME','PDCSAP_FLUX','QUALITY']
        print(len(table), "length at import")
        print(type((table)['QUALITY'][0]))
        #except:
        #    time = scidata.TIME
        #    flux = scidata.PDCSAP_FLUX
        #    quality = scidata.QUALITY
        #    table = Table([time,flux,quality],names=('TIME','PDCSAP_FLUX','QUALITY'))


    if drop_bad_points:
        bad_points = []
        if 'kplr' in file_path:
            q_ind = get_quality_indices(table['SAP_QUALITY'])
        elif 'tess' in file_path:
            q_ind = get_quality_indices(table['QUALITY'])
        
        for j,q in enumerate(q_ind): # j=index, q=quality
            if j+1 not in ok_flags:
                bad_points += q.tolist() # adds bad_points by value of q (the quality indices) and converts to list
    

        # bad_points = [i for i in range(len(table)) if table[i][2]>0]
        table.remove_rows(bad_points)
        print(len(table),"length after drop_bad_points")

    # Delete rows containing NaN values. 
    ## if flux or time columns are NaN's, remove them.
    nan_rows = [ i for i in range(len(table)) if
            math.isnan(table[i][1]) or math.isnan(table[i][0]) ]

    table.remove_rows(nan_rows)

    # Smooth data by deleting overly 'spikey' points.
    ## if flux - 0.5*(difference between neihbouring points) > 3*(distance between neighbouring points), spike identified
    spikes = [ i for i in range(1,len(table)-1) if \
            abs(table[i][1] - 0.5*(table[i-1][1]+table[i+1][1])) \
            > 3*abs(table[i+1][1] - table[i-1][1])]

    ## flux smoothened out by changing those points to 0.5*distance between neighbouring points
    for i in spikes:
        table[i][1] = 0.5*(table[i-1][1] + table[i+1][1])
        
    print(len(table),"length at end")

    return table

def import_XRPlightcurve(file_path,sector,clip=4,drop_bad_points=True,ok_flags=[23],return_type='astropy'):
    """
    file_path: path to file
    sector = lightcurve sector
    drop_bad_points: Removing outlier points. Default False
    mad_plots: plots MAD comparisons
    q: lightcurve quality, default 0 (excludes all non-zero quality)
    clip: Sigma to be clipped by (default 3)
    return_type: Default 'astropy'. Pandas DataFrame also available with 'pandas' 

    returns
        - table: Astropy table of lightcurve
        - info: additional information about the lightcurve (TIC ID, RA, DEC, TESS magnitude, Camera, Chip)
    """
    lc = pd.read_pickle(file_path)

    for i in range(len(lc)):
        if isinstance(lc[i], np.ndarray):
            lc[i] = pd.Series(lc[i])
    for_df = lc[6:]  # TIC ID, RA, DEC, TESS magnitude, Camera, Chip
    columns = [
        "time",
        "raw flux",
        "corrected flux",
        "PCA flux",
        "flux error",
        "quality",
    ]
    df = pd.DataFrame(data=for_df).T 
    df.columns = columns
    
    table = Table.from_pandas(df)
    print(len(table),"length at import")
    # loading Ethan Kruse bad times
    bad_times = data.load_bad_times()
    bad_times = bad_times - 2457000
    # loading MAD 
    mad_df = data.load_mad()
    sec = sector
    camera = lc[4]
    mad_arr = mad_df.loc[:len(table)-1,f"{sec}-{camera}"]
    sig_clip = sigma_clip(mad_arr,sigma=clip,masked=True)
    med_sig_clip = np.nanmedian(sig_clip)
    rms_sig_clip = np.nanstd(sig_clip)
    # setting zero quality only
    #table = table[table['quality'] == 0]

    # applied MAD cut to keep points within selected sigma
    #mad_cut = mad_arr.values < med_sig_clip + clip*(rms_sig_clip)
    mad_cut = mad_arr.values < ~sig_clip.mask # --> check this one. Could it be .data?
    print(len(mad_cut),"length of mad cut")
    
    # return indices of values above MAD threshold
    matched_ind = np.where(~mad_cut)

    # a bit of pandas trickery to make quality = 23, but not overriding existing flags
    df = table.to_pandas()
    b = pd.Series(np.asarray(matched_ind)[0])
    sliced = df.iloc[b]
    sliced['quality'][sliced['quality'] == 0] = 23
    df['quality'].iloc[sliced[sliced.quality == 23].index] = 23
    
    table = Table.from_pandas(df) 
    
    
    table['quality'] = table['quality'].astype(np.int32) # int32 set so it can work with get_quality_indices
#     if table['quality'][np.array(matched_ind)] == 0:
#          # so that this doesn't overwrite other quality flags
     # set quality flag 23
    

    # Ethan Kruse bad time mask
    mask = np.ones_like(table['time'], dtype=bool)
    for i in bad_times:
        newchunk = (table['time']<i[0])|(table['time']>i[1])
        mask = mask & newchunk
        
    # Apply Kruse bad mask to table
    table = table[mask]

    if drop_bad_points:
        bad_points = []
        q_ind = get_quality_indices(table['quality'])
    
        for j,q in enumerate(q_ind): # j=index, q=quality
            if j+1 not in ok_flags:
                bad_points += q.tolist()
        table.remove_rows(bad_points)

        
    # if mad_plot:
    #     mad_plots(table=table,array=mad_arr,median=med_sig_clip,rms=rms_sig_clip,clip=clip,sector=sec,camera=camera)
    
    # completes masking of array elements representing non-zero flags (excludes quality flag 23; above MAD threshold values are excluded to get clean lightcurve)
    #table = table[table['quality'] == 0] 
    
    # Delete rows containing NaN values. 
    nan_rows = [ i for i in range(len(table)) if
            math.isnan(table[i][2]) or math.isnan(table[i][0]) ] # -> check this 

    table.remove_rows(nan_rows)
    print(len(table),"length after drop bad points")

    # Smooth data by deleting overly 'spikey' points.
    spikes = [ i for i in range(1,len(table)-1) if \
            abs(table[i][1] - 0.5*(table[i-1][1]+table[i+1][1])) \
            > 3*abs(table[i+1][1] - table[i-1][1])]

    for i in spikes:
        table[i][1] = 0.5*(table[i-1][1] + table[i+1][1])
    print(len(table),"length at end")

    if return_type == 'pandas':

        return table.to_pandas(), lc[0:6]
    else:

        return table, lc[0:6]


In [4]:
filename = 'tess_testlcs/XRP/tesslcs_sector_6_104_2_min_cadence_targets_tesslc_270577175.pkl'
filename_spoc = 'tess_SPOC/4112/0759/hlsp_tess-spoc_tess_phot_0000000141120759-s0006_tess_v1_lc.fits'

In [5]:
table = import_XRPlightcurve(filename,6, drop_bad_points=True,ok_flags=[23])[0]

993 length at import
993 length of mad cut
835 length after drop bad points
835 length at end


In [6]:
table

time,raw flux,corrected flux,PCA flux,flux error,quality
float64,float64,float64,float64,float64,int32
1469.0125148958496,347161.4921875,347159.51928986365,347257.51836509426,16.086396368743504,0
1469.033348205488,347138.15625,347130.80441512656,347233.69674245414,16.0858834308829,0
1469.05418151656,347137.9453125,347154.0474762639,347237.91987667594,16.086005323284652,0
1469.075014829015,347115.9453125,347137.66357271076,347224.79454237444,16.08544494260152,0
1469.0958481427742,347092.8984375,347135.54636229546,347198.3862718676,16.084751257711787,0
1469.1166814577352,347085.109375,347122.48446406104,347188.17678715877,16.084545873813063,0
1469.1375147737685,347078.0546875,347115.2081440332,347179.6375294337,16.08437582534423,0
1469.1583480907243,347062.7890625,347119.95822593133,347193.874734803,16.08402671044064,0
1469.1791814084336,347063.40625,347110.70301108615,347171.5797639444,16.084105451692125,0
1469.2000147267115,347040.3671875,347099.42433376826,347159.7510696048,16.08360714588746,0


In [19]:
unique(table,keys='quality')

time,raw flux,corrected flux,PCA flux,flux error,quality
float64,float64,float64,float64,float64,int32
1439.0123416701815,221182.74890551923,221211.80105512697,221263.02040823636,12.787467805853597,0


In [20]:
mad_df = data.load_mad()
sec = 6
camera = 1
mad_arr = mad_df.loc[:len(table)-1,f"{sec}-{camera}"]
sig_clip = sigma_clip(mad_arr,sigma=4,masked=True)
# setting zero quality only
#table = table[table['quality'] == 0]

# applied MAD cut to keep points within selected sigma
mad_cut = mad_arr.values < ~sig_clip.mask #(med_sig_clip + clip*(rms_sig_clip))


# return indices of values above MAD threshold
matched_ind = np.where(~mad_cut) 
#indexes = table['quality'][matched_ind][table[matched_ind]['quality'] == 0]
#table['quality'][matched_ind][table[matched_ind]['quality'] == 0][:]

In [21]:
table[matched_ind]

time,raw flux,corrected flux,PCA flux,flux error,quality
float64,float64,float64,float64,float64,int32
1439.0123416701817,221182.74890551923,221211.80105512697,221263.02040823633,12.787467805853597,0
1439.0331752568609,221171.39868890672,221201.8378187784,221270.73399950768,12.787269722961717,0
1439.054008844922,221179.58573301148,221191.1975442419,221247.45746674927,12.787267038082167,0
1439.0748424344265,221155.9932618547,221235.21641180315,221334.37203765972,12.787883650617045,0
1439.0956760254082,221132.4007906979,221220.9417019505,221288.9386897132,12.78768438092573,0
1439.1165096178709,221138.52104207914,221217.10244967323,221310.26865355176,12.787688258958331,0
1439.1373432117891,221144.6412934604,221232.51307927538,221245.09989543504,12.787890213338494,0
1439.1581768071128,221124.10802720996,221200.86158885594,221173.03159981713,12.787279865834934,0
1439.1790104037595,221116.32307873032,221219.77861942875,221269.0850760681,12.787482721609782,0
1439.1998440016257,221108.5381302507,221205.07098785276,221250.21042293808,12.787082078356002,0


In [117]:
#table['quality'][matched_ind] = 23

In [118]:
unique(table,keys='quality')

time,raw flux,corrected flux,PCA flux,flux error,quality
float64,float64,float64,float64,float64,int32
1438.012329056425,221078.6842700523,221324.4525547316,221345.40382970945,12.788868021199995,0


---

In [119]:
df = table.to_pandas()

In [120]:
df.head()

Unnamed: 0,time,raw flux,corrected flux,PCA flux,flux error,quality
0,1438.012329,221078.68427,221324.452555,221345.40383,12.788868,0
1,1438.033163,221083.576744,221288.979645,221329.284113,12.78847,0
2,1438.053996,221118.152122,221293.997549,221348.890213,12.788887,0
3,1438.07483,221133.134442,221265.201048,221363.262429,12.788677,0
4,1438.095663,221146.083634,221244.616909,221354.17075,12.78847,0


In [99]:
b = pd.Series(np.asarray(matched_ind)[0])

sliced = df.iloc[b]

In [100]:
sliced['quality'][sliced['quality'] == 0] = 23

In [101]:
sliced[sliced.quality == 23]

Unnamed: 0,time,raw flux,corrected flux,PCA flux,flux error,quality
1,1438.012329,221078.684270,221324.452555,221345.403830,12.788868,23
2,1438.033163,221083.576744,221288.979645,221329.284113,12.788470,23
3,1438.053996,221118.152122,221293.997549,221348.890213,12.788887,23
4,1438.074830,221133.134442,221265.201048,221363.262429,12.788677,23
5,1438.095663,221146.083634,221244.616909,221354.170750,12.788470,23
...,...,...,...,...,...,...
1175,1463.845848,219520.650596,221171.672556,220979.847935,12.794123,23
1176,1463.866681,219246.903526,221196.761115,220968.295139,12.794545,23
1177,1463.887515,218920.393831,221174.546991,220810.693980,12.794349,23
1178,1463.908348,218554.926627,221167.539721,220700.846204,12.794144,23


In [102]:
df['quality'].iloc[sliced[sliced.quality == 23].index] = 23

In [103]:
df[df.quality == 0]

Unnamed: 0,time,raw flux,corrected flux,PCA flux,flux error,quality
21,1438.429001,221290.278806,221192.199557,221363.019752,12.788688,0
22,1438.449835,221289.605325,221207.178949,221396.737955,12.788686,0
26,1438.533169,221273.973760,221193.136700,221343.160147,12.788284,0
28,1438.574836,221287.221120,221205.898210,221359.388627,12.788469,0
29,1438.595670,221286.784330,221200.022581,221345.521085,12.788677,0
...,...,...,...,...,...,...
988,1459.950006,221338.368693,221185.352443,221111.253381,12.793133,0
989,1459.970840,221329.155595,221173.489364,221122.569160,12.792925,0
990,1459.991673,221346.791574,221192.038654,221139.252273,12.793329,0
991,1460.012506,221338.029781,221184.388910,221142.003497,12.793123,0


In [104]:
    # a bit of pandas trickery to make quality = 23, but not overriding existing flags
#     df = table.to_pandas()
#     b = pd.Series(np.asarray(matched_ind)[0])
#     sliced = df.iloc[b]
#     sliced['quality'][sliced['quality'] == 0] = 23
#     df['quality'].iloc[sliced[sliced.quality == 23].index] = 23
#     table = Table.from_pandas(df) 
#     print(table[table['quality'] == 0])

In [105]:
table = Table.from_pandas(df)

In [107]:
table[table['quality'] == 0]

time,raw flux,corrected flux,PCA flux,flux error,quality
float64,float64,float64,float64,float64,int32
1438.4290011650496,221290.27880613596,221192.19955721652,221363.0197522273,12.788687857218777,0
1438.449834772096,221289.60532537883,221207.17894939962,221396.73795460607,12.788685769210739,0
1438.533169186452,221273.9737597984,221193.13669998615,221343.16014675813,12.788283970462171,0
1438.5748363836651,221287.22112002788,221205.898209846,221359.38862695597,12.7884692110973,0
1438.5956699795415,221286.78433043204,221200.02258073117,221345.5210849503,12.788677118888094,0
1438.616503573578,221292.50775003113,221199.78897288622,221336.87490197984,12.788677118888094,0
1438.6373371657987,221270.12248772744,221204.09313154564,221352.52546138366,12.788284567054674,0
1438.6790043450208,221264.7158293244,221201.3552949717,221336.97946293312,12.788060544612533,0
1438.699837932203,221271.88336673199,221207.35769633824,221391.5247066196,12.788256825473804,0
1438.7206715179295,221268.4560057365,221248.09116174176,221327.57750792304,12.788861757263104,0


---