## Import

In [175]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [176]:
# Import three relevant datasets
CAT_FILENAME = r'data\exoplanet.eu_catalog_08-01-26_13_03_00.csv'
df_cat = pd.read_csv(CAT_FILENAME)
df_cat.head(3)

Unnamed: 0,name,planet_status,mass,mass_error_min,mass_error_max,mass_sini,mass_sini_error_min,mass_sini_error_max,radius,radius_error_min,...,star_sp_type,star_age,star_age_error_min,star_age_error_max,star_teff,star_teff_error_min,star_teff_error_max,star_detected_disc,star_magnetic_field,star_alternate_names
0,109 Psc b,Confirmed,5.743,0.289,1.011,6.383,0.078,0.078,1.152,,...,G5 IV,6.9,0.6,0.6,5600.0,80.0,80.0,,,HD 10697
1,112 Psc b,Confirmed,,0.005,0.004,0.033,0.005,0.004,,,...,G0IV,,,,5986.0,105.437,105.437,,,HD 12235
2,112 Psc c,Confirmed,9.866,1.781,3.19,,,,,,...,G0IV,,,,5986.0,105.437,105.437,,,HD 12235


In [177]:
MID_FILENAME = r'data\ExoClock IV Ephemerides_20260107\mid_time_data\all_mid_time_data.txt'
df_mid = pd.read_csv(MID_FILENAME, sep='\s+')
df_mid.head(3)

Unnamed: 0,Planet,Tmid_(BJD_TDB),Tmid_unc.,source,ID
0,55Cnce,2455607.0,0.00087,literature,2011ApJ...737L..18W
1,55Cnce,2458871.0,0.00066,space,55Cnce_9945_2020-01-22_TESS
2,55Cnce,2458871.0,0.0023,space,55Cnce_9946_2020-01-22_TESS


In [178]:
T0P_FILENAME = r'data\ExoClock IV Ephemerides_20260107\catalogue_of_ephemerides.txt'
df_t0p = pd.read_csv(T0P_FILENAME, sep='\s+')
df_t0p.tail(3)

Unnamed: 0,Planet,T0_(BJD_TDB),T0_unc.,P_(days),P_unc.
617,XO-6b,2459424.0,7e-05,3.764992,4.6e-07
618,XO-7b,2459567.0,7.2e-05,2.864133,4.7e-07
619,piMenc,2459385.0,0.00015,6.267821,1.3e-06


## Clean exoplanet.eu dataset then merge with T0P

In [179]:
print(df_cat.columns)

Index(['name', 'planet_status', 'mass', 'mass_error_min', 'mass_error_max',
       'mass_sini', 'mass_sini_error_min', 'mass_sini_error_max', 'radius',
       'radius_error_min', 'radius_error_max', 'orbital_period',
       'orbital_period_error_min', 'orbital_period_error_max',
       'semi_major_axis', 'semi_major_axis_error_min',
       'semi_major_axis_error_max', 'eccentricity', 'eccentricity_error_min',
       'eccentricity_error_max', 'inclination', 'inclination_error_min',
       'inclination_error_max', 'angular_distance', 'discovered', 'updated',
       'omega', 'omega_error_min', 'omega_error_max', 'tperi',
       'tperi_error_min', 'tperi_error_max', 'tconj', 'tconj_error_min',
       'tconj_error_max', 'tzero_tr', 'tzero_tr_error_min',
       'tzero_tr_error_max', 'tzero_tr_sec', 'tzero_tr_sec_error_min',
       'tzero_tr_sec_error_max', 'lambda_angle', 'lambda_angle_error_min',
       'lambda_angle_error_max', 'impact_parameter',
       'impact_parameter_error_min', 'impa

In [184]:
# Select cols
df_cat_relevant = df_cat[['name', 'mass', 'mass_error_min', 'mass_error_max',
                          'mass_sini', 'mass_sini_error_min', 'mass_sini_error_max',
                          'radius', 'radius_error_min', 'radius_error_max',
                          'eccentricity', 'eccentricity_error_min', 'eccentricity_error_max',
                          'inclination', 'inclination_error_min', 'inclination_error_max',
                          'omega', 'omega_error_min', 'omega_error_max',
                          'alternate_names', 'star_name']]

# Pick error as larger error of the two
params = ['mass', 'mass_sini', 'radius', 'eccentricity', 'omega', 'inclination']
for p in params:
    df_cat_relevant = df_cat_relevant.copy()
    df_cat_relevant[f'{p}_error'] = df_cat_relevant[[f'{p}_error_min', f'{p}_error_max']].abs().max(axis=1)
df_cat_relevant = df_cat_relevant[['name', 'alternate_names', 'star_name', *[col for p in params for col in (f'{p}', f'{p}_error')]]]

# Merge with new good column _key with appropriate formatting
df_left, df_right = df_t0p.copy(), df_cat_relevant.copy()
# left key: Remove final Ab -> b and such
df_left['_key'] = df_left['Planet'].str.replace(r'([A-Za-z0-9])([AB])([a-z])$', r'\1\3', regex=True)
# Right key: Strip whitespace then Remove final Ab -> b and such too 
df_right['_key'] = df_right['name'].str.replace(' ', '', regex=False) \
                   .str.replace(r'([AB])([a-z])$', r'\2', regex=True)

# Manual key mapping for the rest (Alternate names for the rest)
rename_map = {
    'K2-236b': 'EPIC211945201b',
    'K2-267b': 'EPIC246851721b',
    # None: 'GJ436b', # Exists in exoplanet.eu / RV
    'TOI-1098b': 'HD110082b',
    'nu2Lupb': 'HD136352c',
    # None: 'HD209458b', # Exists in exoplanet.eu / RV
    'TOI-1430b': 'HD235088b',
    'TOI-282c': 'HD28109c',
    'TOI-282d': 'HD28109d',
    'TOI-396b': 'HR858b',
    'WASP-122b': 'KELT-14b',
    'MASCARA-3b': 'KELT-24b',
    # None: 'KELT-4Ab', # Exists in exoplanet.eu / RV
    'Kepler-448b': 'KOI-12b',
    'Kepler-13b': 'KOI-13b',
    'Kepler-89c': 'KOI-94c',
    'Kepler-89d': 'KOI-94d',
    'Kepler-89e': 'KOI-94e',
    # None: 'Kepler-76b', # Exists in exoplanet.eu / Other
    # None: 'Kepler-854b', # False positive planet
    # None: 'TIC257060897b', # Exists but not in TEPCat
    'TOI-150b': 'TOI-150.01',
    'TOI-216c': 'TOI-216.01',
    'TOI-216b': 'TOI-216.02',
    'HAT-P-10b': 'WASP-11b',
    'KELT-22b': 'WASP-173b'
}
df_right['_key'] = df_right['_key'].replace(rename_map)

df_right.head(3)

Unnamed: 0,name,alternate_names,star_name,mass,mass_error,mass_sini,mass_sini_error,radius,radius_error,eccentricity,eccentricity_error,omega,omega_error,inclination,inclination_error,_key
0,109 Psc b,HD 10697 b,109 Psc,5.743,1.011,6.383,0.078,1.152,,0.104,0.009,112.816,5.448,86.116,20.53,109Pscb
1,112 Psc b,HD 12235 b,112 Psc,,0.005,0.033,0.005,,,0.376,0.254,279.492,67.524,,,112Pscb
2,112 Psc c,HD 12235 c,112 Psc,9.866,3.19,,,,,0.174,0.154,79.772,31.067,47.738,12.651,112Pscc


In [185]:
# Test to see output
df_left.to_csv("df_left_test.csv", index=False)
df_right.to_csv("df_right_test.csv", index=False)

# Merge~!
df_t0p_cat = df_left.merge(
    df_right,
    on='_key',
    how='inner'
).drop(columns='_key')

# Drop 'name', 'alternate_names' cols
df_t0p_cat = df_t0p_cat.drop(columns=['name', 'alternate_names'])

# Exoport and show
EXPORT_FILENAME = r'data_cleaned\merged_exoclock_exoplanet.eu.csv'
df_t0p_cat.to_csv(EXPORT_FILENAME, index=False)
df_t0p_cat.tail(3)

Unnamed: 0,Planet,T0_(BJD_TDB),T0_unc.,P_(days),P_unc.,star_name,mass,mass_error,mass_sini,mass_sini_error,radius,radius_error,eccentricity,eccentricity_error,omega,omega_error,inclination,inclination_error
616,XO-6b,2459424.0,7e-05,3.764992,4.6e-07,XO-6,4.47,0.12,,,2.17,0.2,0.0,,,,85.06,0.07
617,XO-7b,2459567.0,7.2e-05,2.864133,4.7e-07,XO-7,0.726,0.038,,,1.346,0.02,0.038,0.033,,,83.23,0.01
618,piMenc,2459385.0,0.00015,6.267821,1.3e-06,pi Men,0.01142,0.0012,,,0.16719,0.004532,0.15,0.15,,,87.553,0.18


In [186]:
# Check removed left
removed_left = sorted(set(df_t0p['Planet']) - set(df_t0p_cat['Planet']))
removed_left

['Kepler-854b', 'TIC257060897b']