## Import

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
import seaborn as sns
from astropy.timeseries import LombScargle
from scipy.stats import median_abs_deviation as MAD
from scipy.signal import find_peaks
from tqdm import tqdm

In [2]:
FILENAME = r'data/all_mid_time_data_with_OC.csv'
df = pd.read_csv(FILENAME)
df.tail(3)

Unnamed: 0,Planet,source,Tmid,Tmid_err,period,period_err,epoch,O-C_(days),O-C_err_(days)
2092,Kepler-307c,Holczer+16,2456366.0,,13.08428,3e-06,107,-0.0125,0.006944
2093,Kepler-307c,Holczer+16,2456379.0,,13.08428,3e-06,108,-0.008125,0.005556
2094,Kepler-307c,Holczer+16,2456405.0,,13.08428,3e-06,110,-0.013611,0.004167


## Outlier rejection (2-sigma clipping)

In [3]:
# Remove excessively large TTV error and then 2 sigma clipping
# def outlier_reject(df, errthres=3, sigma=2):
#     OCerr_mu = df.groupby('Planet')['O-C_err_(days)'].transform('mean')
#     mask_err = df['O-C_err_(days)'] / OCerr_mu <= errthres
#     df_masked = df.loc[mask_err].reset_index(drop=True)
    
#     OC_mu = df_masked.groupby('Planet')['O-C_(days)'].transform('median')
#     OC_std = df_masked.groupby('Planet')['O-C_(days)'].transform('std')
#     mask_sigma = (df_masked['O-C_(days)'] - OC_mu).abs() <= sigma * OC_std
#     df_masked2 = df_masked.loc[mask_sigma].reset_index(drop=True)
    
#     return df_masked2

# df_clip = outlier_reject(df)
# df_clip.tail(3)

def outlier_reject_iter(df, errthres=3, sigma=3, max_iter=100):
    # 1) One-time large-error filter
    OCerr_mu = df.groupby('Planet')['O-C_err_(days)'].transform('mean')
    mask_err = df['O-C_err_(days)'] / OCerr_mu <= errthres
    df_masked = df.loc[mask_err].reset_index(drop=True)

    # 2) Iterative sigma clipping only
    for _ in range(max_iter):
        OC_mu = df_masked.groupby('Planet')['O-C_(days)'].transform('median')
        OC_std = df_masked.groupby('Planet')['O-C_(days)'].transform('std')

        mask_sigma = (df_masked['O-C_(days)'] - OC_mu).abs() <= sigma * OC_std
        df_new = df_masked.loc[mask_sigma].reset_index(drop=True)

        # stop when no further points are removed
        if len(df_new) == len(df_masked):
            break

        df_masked = df_new

    return df_masked

df_clip = outlier_reject_iter(df)
df_clip.tail(3)

Unnamed: 0,Planet,source,Tmid,Tmid_err,period,period_err,epoch,O-C_(days),O-C_err_(days)
2033,Kepler-307c,Holczer+16,2456366.0,,13.08428,3e-06,107,-0.0125,0.006944
2034,Kepler-307c,Holczer+16,2456379.0,,13.08428,3e-06,108,-0.008125,0.005556
2035,Kepler-307c,Holczer+16,2456405.0,,13.08428,3e-06,110,-0.013611,0.004167


In [4]:
# Import big dataset for later insertion of final parameters (WIP)
FILENAME = r'data/target_list_filtered.xlsx'
df_targets = pd.read_excel(FILENAME)
df_targets.tail(2)

Unnamed: 0,KOI_inner,KOI_outer,name_inner,name_outer,T0_inner,T0_inner_err,T0_outer,T0_outer_err,P_inner,P_inner_err,...,peri_outer_err,peri_convention,peri_offset,mutual_inclination,mutual_inclination_err,st_mass,st_mass_error,parameter_reference,parameter_reference_doi,Notes
12,,,TOI-2015b,TOI-2015c,2459713.0,0.00251,,,3.346493,4.7e-05,...,1.886108,"PyTTV, REBOUND",0,,,0.33,0.02,Berkaoui+25,10.1051/0004-6361/202452916,"ew_converted, High mutual inclination"
13,,,TOI-2015b,TOI-2015c,2459713.0,0.00251,,,3.348004,5.6e-05,...,2.49892,"PyTTV, REBOUND",0,,,0.33,0.02,Berkaoui+25,10.1051/0004-6361/202452916,"ew_converted, High mutual inclination"


## Periodicity search with Lomb-Scargle periodogram

In [5]:
def lomb_scargle_analysis(df_full, planet_name, TTV_position, expected_Pttv=None):  
    # Take only specific planet
    df = df_full[df_full.Planet == planet_name]
    
    # Columns extraction as Series
    Tmid = df['Tmid']
    Porb = df['period'].mode().iloc[0]
    OC = df['O-C_(days)']
    OCerr = df['O-C_err_(days)']
    
    # Frequency grid and setup
    baseline = Tmid.max() - Tmid.min()
    fmin, fmax = 1 / (10*baseline), 1 / (2.01 * Porb)
    Nfreq = max(np.int64(10 * baseline * fmax), 5000)  # VanderPlas 2017 for max
    freq = np.linspace(fmin, fmax, Nfreq)

    # Lomb–Scargle (1-harmonic model)
    ls = LombScargle(Tmid, OC, OCerr, nterms=1, normalization="standard")
    power = ls.power(freq, normalization="standard")

    # False Alarm Probability / FAP at 5%, 1%, 0.1%
    fap5 = ls.false_alarm_level(0.05, minimum_frequency=fmin, maximum_frequency=fmax)
    fap1 = ls.false_alarm_level(0.01, minimum_frequency=fmin, maximum_frequency=fmax)
    fap01 = ls.false_alarm_level(0.001, minimum_frequency=fmin, maximum_frequency=fmax)
    fap_threshold = float(np.atleast_1d(fap01)[0])
    
    # Find power peaks with FAP < 0.1%
    peaks_idx, _ = find_peaks(power, height=fap_threshold)
    flag_insufficient_power = 1 if (peaks_idx.size == 0) else 0
    
    if (flag_insufficient_power == 1):
        power_best = power.max()
        idx_best = np.argmax(power)
        f_best = freq[idx_best]

    elif (flag_insufficient_power == 0):
        peak_freqs = freq[peaks_idx]
        peak_periods = 1/peak_freqs
        # Peak at fitted period closest to expected TTV period
        peaks_idx_best = np.argmin(np.abs(peak_periods - expected_Pttv))
        idx_best = peaks_idx[peaks_idx_best]
        f_best = freq[idx_best]
        power_best = power[idx_best]
    
    # Sinusoidal model and FAP
    model = ls.model(Tmid, f_best)
    fap = ls.false_alarm_probability(power_best, minimum_frequency=fmin, maximum_frequency=fmax)

    # BICs for linear vs sinusoidal determination
    offset = ls.model_parameters(f_best)[0]
    chi2_0 = np.sum(((OC - offset) / OCerr)**2)
    chi2_1 = np.sum(((OC - model) / OCerr)**2)
    n = len(OC)
    k0, k1 = 1, 3  # Parameters estimated by model; offset + sin/cos terms

    bic0 = k0 * np.log(n) + chi2_0
    bic1 = k1 * np.log(n) + chi2_1

    # Best frequency -> period and amplitude 
    period = 1/f_best
    try:
        f_best_under, f_best_over = freq[idx_best-1], freq[idx_best+1]
        period_over, period_under = 1/f_best_under, 1/f_best_over
        period_err = max(period_over-period, period-period_under)
    except IndexError:
        period_err = np.nan

    # Sinusoidal amplitudes
    A1, A2 = ls.model_parameters(f_best)[1:]
    amp = np.sqrt(A1**2 + A2**2)
    amp_err = np.nan
    phi = np.arctan2(A2, A1) / (2*np.pi)
    
    # TTV strength
    delta_BIC = bic0 - bic1
    scatter = MAD(OC, nan_policy='omit') / OCerr.median()
    if (delta_BIC >= 10) and (scatter >= 3):
        TTV_strength = 'strong'
    elif (delta_BIC >= 10) and (scatter >= 2):
        TTV_strength = 'weak'
    elif (delta_BIC < 10) or (scatter < 2):
        TTV_strength = 'no_TTV'

    # == PLOTTING TIME! ==
    t_fit = np.linspace(Tmid.min(), Tmid.max(), 2000)
    model_fit = ls.model(t_fit, f_best)
    
    # Plot
    fig, ax = plt.subplots(1, 2, figsize=(12, 3.5))
    P = 1/freq
    Pmin, Pmax = P.min(), P.max()

    # Plot GLS power spectrum
    ax[0].plot(1/freq, power, lw=0.5, color='black')

    # Plot FAP
    ax[0].axvline(period, lw=1, color='black', ls='-.',
                  label=f'{planet_name} superperiod = {1/f_best:.2f} d\n(FAP = {fap*100:.2g}%)')
    colors = ['red', 'royalblue', 'yellowgreen']
    for i, fap_ in enumerate([fap5, fap1, fap01]):
        ax[0].axhline(fap_, ls='--', lw=0.7, color=colors[i])

    # Plot expected Pttv
    ax[0].axvline(expected_Pttv, lw=1, color='fuchsia', ls='-.',
                  label=f'Expected superperiod = {expected_Pttv:.2f} d')
    ax[0].legend(loc='upper right')
        
    ax[0].axhline(power_best, lw=0.7, color='black', ls='-.')
    
    ax[0].set_xlabel("Superperiod [days]"); ax[0].set_ylabel("Lomb-Scargle Power")
    ax[0].set_xscale("log")
    ax[0].set_xlim(Pmin / 1.05, Pmax * 1.05); ax[0].set_ylim(0, power.max() * 1.5) 

    # Plot sinusoidal fit
    day2min = 1440
    uses_Holczer = df.source.eq('Holczer+16').any()
    uses_Berkaoui = df.source.eq('Berkaoui+25').any()
    uses_Kokori = not (uses_Holczer or uses_Berkaoui)
    
    # Source list
    if uses_Kokori == True:
        source_list = [f'Kokori+25 ({s})' for s in ('literature', 'space', 'exoclock')]
        colors_list = [f'C{i}' for i in range(3)]
    elif uses_Holczer == True:
        source_list = ['Holczer+16']
        colors_list = ['black']
    elif uses_Berkaoui == True:
        source_list = ['Berkaoui+25']
        colors_list = ['black']
    
    for i, source in enumerate(source_list):
        msk = (df.source == source)
        color = colors_list[i]
        ax[1].errorbar(Tmid[msk] - 2454900, (OC[msk] - offset) * day2min, OCerr[msk] * day2min,
                       fmt='.', color=color, zorder=1, label=source)
    
    ax[1].plot(t_fit - 2454900, (model_fit - offset) * day2min, label=f'Sinusoidal fit', color='red', lw=1, zorder=2)

    # Find ylim
    ylim = np.max([amp, (OC - offset).abs().max()]) * day2min * 1.5
    ylim = (-ylim, +ylim)
    
    # Labels
    ax[1].set_xlabel("t [BJD-2454900]"); ax[1].set_ylabel("O-C [minutes]")
    ax[1].set_ylim(ylim) 
    ax[1].legend(loc='upper left')
    
    plt.tight_layout()
    EXPORT_FILENAME = rf'fit_pics/{planet_name}_GLSfit.jpg'
    plt.savefig(EXPORT_FILENAME, dpi=300, bbox_inches="tight")
    plt.close(fig)
    del fig, ax

    output = [period, period_err, amp, amp_err,
             bic0, bic1, delta_BIC, fap, Nfreq,
             TTV_strength, scatter]

    return output
    
def expected_Pttv(planet):
    row = df_final.loc[
        (df_final['name_inner'] == planet) |
        (df_final['name_outer'] == planet)
    ]
    Pttvs = row['expected_Pttv'].iloc[0] if not row.empty else None
    return Pttvs

def get_TTVposition(planet):
    if (df_final['name_inner'] == planet).any():
        return 'inner'
    if (df_final['name_outer'] == planet).any():
        return 'outer'
    return None

In [6]:
# Inititate final dataframe
df_final = df_targets.copy()
df_final_newcols = ['Pttv_inner', 'Pttv_inner_err', 'Attv_inner', 'Attv_inner_err',
                    'bic0_inner', 'bic1_inner', 'delta_bic_inner', 'fap_inner', 'f_searched_inner',
                    'TTV_strength_inner', 'scatter_inner',
                    'Pttv_outer', 'Pttv_outer_err', 'Attv_outer', 'Attv_outer_err',
                    'bic0_outer', 'bic1_outer', 'delta_bic_outer', 'fap_outer', 'f_searched_outer',
                    'TTV_strength_outer', 'scatter_outer']

df_final[df_final_newcols] = np.nan
strcols = ['TTV_strength_inner', 'TTV_strength_outer']
df_final[strcols] = None
df_final[strcols] = df_final[strcols].astype(object)

all_planets = df_clip['Planet'].dropna().astype(str).unique()
for planet in tqdm(all_planets):
    exp_Pttv = expected_Pttv(planet)
    pos = get_TTVposition(planet)
    
    newcol_values = lomb_scargle_analysis(df_clip, planet_name=planet, TTV_position=pos,
                                          expected_Pttv=exp_Pttv)
    
    cols2insert = [
        f'Pttv_{pos}', f'Pttv_{pos}_err',
        f'Attv_{pos}', f'Attv_{pos}_err',
        f'bic0_{pos}', f'bic1_{pos}', f'delta_bic_{pos}',
        f'fap_{pos}', f'f_searched_{pos}',
        f'TTV_strength_{pos}', f'scatter_{pos}',
    ]
    mask = (df_final[f'name_{pos}'] == planet)
    df_final.loc[mask, cols2insert] = newcol_values

EXPORT_FILENAME = r'data\target_list_GLSfitted.csv'
df_final.to_csv(EXPORT_FILENAME, index=False)
df_final.head(3)

100%|██████████████████████████████████████████████████████████████████████████████████| 21/21 [00:12<00:00,  1.68it/s]


Unnamed: 0,KOI_inner,KOI_outer,name_inner,name_outer,T0_inner,T0_inner_err,T0_outer,T0_outer_err,P_inner,P_inner_err,...,Pttv_outer_err,Attv_outer,Attv_outer_err,bic0_outer,bic1_outer,delta_bic_outer,fap_outer,f_searched_outer,TTV_strength_outer,scatter_outer
0,137.01,137.02,Kepler-18c,Kepler-18d,2454961.0,0.000502,2455704.0,0.00026,7.641562,5.7e-07,...,0.472211,0.002859,,1568.83874,695.276533,873.562208,1.237263e-27,5000.0,no_TTV,1.846272
1,248.01,248.02,Kepler-49b,Kepler-49c,2454967.0,0.001562,2454970.0,0.003099,7.203853,1.73e-06,...,1.296243,0.013972,,582.297603,337.441149,244.856454,5.230233e-13,5000.0,no_TTV,1.385
2,277.02,277.01,Kepler-36b,Kepler-36c,2454961.0,0.027837,2454956.0,0.013792,13.849118,3.056e-05,...,1.248697,0.080939,,30764.833358,4529.031425,26235.801933,6.278113e-30,5000.0,strong,20.2625
