## Splits data into ones with 1 planet and 2 planets respectively.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# Import cleaned/merged dataset
FILENAME = r'data_cleaned\merged_GLSfitted.csv'
df = pd.read_csv(FILENAME)
# Drop unused columns
df = df.drop(columns=['pl_letter', 'mass_sini', 'mass_sini_error', 'radius', 'radius_error'])

df.head(3)

Unnamed: 0,name_exoclock,name_exoplanet.eu,star_name,T0_(BJD_TDB),T0_unc.,P_(days),P_unc.,mass,mass_error,orbital_period,...,Attv2,Nbest,scatter,bic0,bic1,bic2,delta_bic,fap,sampled_f,TTV_strength
0,,55 Cnc Ab,55 Cnc A,,,,,0.84,0.23,14.65314,...,,,,,,,,,,
1,,55 Cnc Ac,55 Cnc A,,,,,0.1714,0.0055,44.373,...,,,,,,,,,,
2,,55 Cnc Ad,55 Cnc A,,,,,3.878,0.068,5574.2,...,,,,,,,,,,


In [4]:
# Mask by planet_count = 1
df_1p = df[df.planet_count == 1]
df_1p = df_1p.drop(columns=['orbital_period', 'orbital_period_error'])
EXPORT_FILENAME = r'data_cleaned\merged_1planet.csv'
df_1p.to_csv(EXPORT_FILENAME, index=False)
df_1p.head(3)

Unnamed: 0,name_exoclock,name_exoplanet.eu,star_name,T0_(BJD_TDB),T0_unc.,P_(days),P_unc.,mass,mass_error,eccentricity,...,Attv2,Nbest,scatter,bic0,bic1,bic2,delta_bic,fap,sampled_f,TTV_strength
5,CoRoT-1b,CoRoT-1 b,CoRoT-1,2455112.0,6.7e-05,1.508968,5.8e-08,1.03,0.12,0.071,...,0.001136,2.0,0.558962,-1054.658397,-1052.437831,-1054.619929,-0.038468,,19395.0,no_TTV
6,CoRoT-11b,CoRoT-11 b,CoRoT-11,2456068.0,0.00034,2.994278,4.5e-07,2.33,0.27,0.072,...,0.002477,2.0,0.565121,-324.10276,-327.046854,-336.983841,12.881082,,9174.0,no_TTV
7,CoRoT-19b,CoRoT-19 b,CoRoT-19,2456551.0,0.00045,3.897138,8.8e-07,1.11,0.06,0.047,...,,1.0,0.370604,-223.703462,-227.203312,-217.081188,3.499851,1.0,6455.0,no_TTV


In [5]:
df_2p = df[df.planet_count == 2]

# Sort by ascending star_name, descending period
df_2p = df_2p.sort_values(by=['star_name', 'orbital_period'], ascending=[True, False])
# Assumes exactly 2 rows per star_name. Higher period → outer, lower → inner.
df_2p['planet_pos'] = (
    df_2p.groupby('star_name')
      .cumcount()
      .map({0: 'outer', 1: 'inner'})
)

# Get period ratio P2/P1
df_2p['period_ratio'] = (
    df_2p.pivot(index='star_name', columns='planet_pos', values='orbital_period')
      .eval('outer / inner')
      .reindex(df_2p['star_name'])
      .to_numpy()
)

# Find approximate MMR
allowed_MMR = {
    '2:1': 2/1, '3:2': 3/2, '4:3': 4/3, '5:4': 5/4,
    '3:1': 3/1, '5:3': 5/3, '7:5': 7/5,
    '4:1': 4/1, '5:2': 5/2,
    '5:1': 5/1, '7:3': 7/3}
vals = np.array(list(allowed_MMR.values()))
keys = np.array(list(allowed_MMR.keys()))

def closest_allowed_MMR(x):
    if np.isnan(x) or x >= 4.1:
        return np.nan
    return keys[np.argmin(np.abs(vals - x))]

df_2p['MMR'] = df_2p['period_ratio'].apply(closest_allowed_MMR)

# Get j and N
from natsume.common import get_MMR
df_2p[['j', 'N']] = (
    df_2p['MMR']
      .apply(lambda x: pd.Series(get_MMR(x)) if isinstance(x, str) else pd.Series([np.nan, np.nan]))
)
# Get Delta
from natsume.common import get_NormalizedResonanceDistance
df_2p['Delta'] = get_NormalizedResonanceDistance(1, df_2p.period_ratio, df_2p.j, df_2p.N)

EXPORT_FILENAME = r'data_cleaned\merged_2planet.csv'
df_2p.to_csv(EXPORT_FILENAME, index=False)
df_2p.head(3)

Unnamed: 0,name_exoclock,name_exoplanet.eu,star_name,T0_(BJD_TDB),T0_unc.,P_(days),P_unc.,mass,mass_error,orbital_period,...,delta_bic,fap,sampled_f,TTV_strength,planet_pos,period_ratio,MMR,j,N,Delta
13,,GJ 1132 c,GJ 1132,,,,,0.008316,0.001381,8.929,...,,,,,outer,5.481509,,,,
12,GJ1132b,GJ 1132 b,GJ 1132,2458041.0,9.3e-05,1.62893,8.6e-07,0.00522,0.00072,1.628931,...,-7.567203,1e-06,8764.0,no_TTV,inner,5.481509,,,,
32,,HAT-P-11 c,HAT-P-11,,,,,3.06,0.4,3299.0,...,,,,,outer,674.945446,,,,
