# ExoNAMD API v1.0


## Summary

The observer wants to compute the relative and/or absolute NAMD of:

- a given multiplanetary system;
- a subset of multiplanetary systems;
- all the known ones.

This tool handles all of the above cases.


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np

import pandas as pd
# import modin.pandas as pd
import swifter
import warnings

from exonamd.catalog import download_nasa_confirmed_planets
from exonamd.utils import ROOT
from exonamd.utils import fetch_aliases
from exonamd.utils import update_host
from exonamd.utils import update_planet
from exonamd.solve import solve_values
from exonamd.interp import interp_eccentricity
from exonamd.interp import interp_mass
from exonamd.interp import interp_inclination
from exonamd.interp import interp_sma


warnings.filterwarnings("ignore")
pd.options.display.max_columns = 20
pd.options.display.max_rows = 30
pd.options.mode.copy_on_write = True
swifter.set_defaults(
    npartitions=None,
    dask_threshold=1,
    scheduler="processes",
    progress_bar=False,
    progress_bar_desc=None,
    allow_dask_on_strings=False,
    force_parallel=False,
)

### Task 1: getting the data

This task retrieves the parameters of confirmed systems from the NASA Exoplanet Archive database.


In [3]:
df, df_old = download_nasa_confirmed_planets(
    sy_pnum=1,
    from_scratch=False,
)

### Task 2: dealing with the aliases


Fetch aliases


In [4]:
aliases = fetch_aliases(df["hostname"].unique())

Fetched 3/3 entries on try 1


Curate aliases


In [5]:
df["hostname"] = df.swifter.apply(update_host, args=(aliases,), axis=1)
df["pl_name"] = df.swifter.apply(update_planet, args=(aliases,), axis=1)

Pandas Apply:   0%|          | 0/6 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/6 [00:00<?, ?it/s]

In [6]:
def check_name(names):
    if len(set(name[:3] for name in names)) > 1:
        return False
    return True


name_ok = df.groupby("hostname")["pl_name"].apply(check_name)

for hostname in name_ok[~name_ok].index:
    print(f"Inconsistent planet names for {hostname}")

### Task 3: computing missing values (if any) from simple equations


In [7]:
df[
    [
        "pl_orbsmax",
        "pl_ratdor",
        "st_rad",
        "pl_rade",
        "pl_ratror",
        "pl_orbper",
        "st_mass",
    ]
] = df.swifter.apply(solve_values, axis=1, result_type="expand")

Pandas Apply:   0%|          | 0/6 [00:00<?, ?it/s]

### Task 4: storing the curated database


In [8]:
if df_old is not None:
    df_new = df.copy()
    df = pd.concat([df_new, df_old], ignore_index=True)
    df = df.drop_duplicates(keep="last")

df.to_csv(os.path.join(ROOT, "data", "exo.csv"), index=False)

In [9]:
# df = pd.read_csv(os.path.join(ROOT, "data", "exo.csv"))

Drop columns that are no longer needed


In [10]:
df.drop(
    columns=[
        "pl_ratdor",
        "st_rad",
        "pl_ratror",
        "pl_orbper",
        "st_mass",
    ],
    inplace=True,
)

### Task 5: input missing values (if any) by interpolation


Use nanmedian to thin down the data


In [11]:
cols = [
    c
    for c in df.columns
    if c not in ["hostname", "pl_name", "default_flag", "rowupdate"]
]

medians = df.groupby("pl_name")[cols].transform(np.nanmedian)
df.loc[df["default_flag"] == 1, cols] = medians.loc[df["default_flag"] == 1]
df = df[df["default_flag"] == 1]
df.drop(columns="default_flag", inplace=True)

Instantiate flags


In [12]:
df["flag"] = "0"

Solve missing eccentricity values


In [13]:
df[
    [
        "pl_orbeccen",
        "pl_orbeccenerr1",
        "pl_orbeccenerr2",
        "flag",
    ]
] = df.swifter.apply(interp_eccentricity, axis=1, result_type="expand")

Pandas Apply:   0%|          | 0/2430 [00:00<?, ?it/s]

Solve missing planetary mass values


In [14]:
df[
    [
        "pl_bmasse",
        "pl_bmasseerr1",
        "pl_bmasseerr2",
        "flag",
    ]
] = df.swifter.apply(interp_mass, axis=1, result_type="expand")

Pandas Apply:   0%|          | 0/2430 [00:00<?, ?it/s]

Drop columns that are no longer needed


In [15]:
df = df.drop(columns=["pl_rade", "pl_radeerr1", "pl_radeerr2"])

Remove systems where at least one planet has no mass or semi-major axis (if any)


In [16]:
mask = (
    df.groupby("hostname")[["pl_bmasse", "pl_orbsmax"]]
    .transform(lambda x: x.isnull().any())
    .any(axis=1)
)
df = df[~mask]

Solve missing values in inclinations


In [17]:
df[
    [
        "pl_orbincl",
        "pl_orbinclerr1",
        "pl_orbinclerr2",
        "flag",
    ]
] = df.swifter.apply(interp_inclination, args=(df,), axis=1, result_type="expand")

Pandas Apply:   0%|          | 0/2208 [00:00<?, ?it/s]

Solve missing values in semi-major axis uncertainties


In [18]:
df[
    [
        "pl_orbsmaxerr1",
        "pl_orbsmaxerr2",
        "flag",
    ]
] = df.swifter.apply(interp_sma, axis=1, result_type="expand")

Pandas Apply:   0%|          | 0/2208 [00:00<?, ?it/s]

### Task 6: storing the curated+interpolated database


In [19]:
df.to_csv(os.path.join(ROOT, "data", "exo_interp.csv"), index=False)

In [20]:
df = pd.read_csv(os.path.join(ROOT, "data", "exo_interp.csv"))

### Task 7: compute the parameters for the NAMD calculation

In [21]:
len(df.hostname.unique()), len(df.pl_name.unique()), len(df)

# problem: there are less unique planet names than the length of the dataframe
# find duplicates

df[df.duplicated(subset=["hostname", "pl_name"], keep=False)].sort_values(
    by=["hostname", "pl_name"]
)

Unnamed: 0,hostname,pl_name,rowupdate,sy_pnum,pl_orbsmax,pl_orbsmaxerr1,pl_orbsmaxerr2,pl_bmasse,pl_bmasseerr1,pl_bmasseerr2,pl_orbeccen,pl_orbeccenerr1,pl_orbeccenerr2,pl_orbincl,pl_orbinclerr1,pl_orbinclerr2,pl_trueobliq,pl_trueobliqerr1,pl_trueobliqerr2,flag
6,TOI-1338 A,TOI-1338 A b,2024-08-07,2,0.420509,0.0086,-0.0088,11.3,2.1,-2.1,0.0331,0.0022,-0.0021,90.494,0.013,-0.014,,,,0
100,TOI-1338 A,TOI-1338 A b,2024-08-07,2,0.420509,0.0086,-0.0088,11.3,2.1,-2.1,0.0331,0.0022,-0.0021,90.494,0.013,-0.014,,,,0
9,TOI-1338 A,TOI-1338 A c,2024-08-07,2,0.724598,0.016,-0.016,75.4,4.0,-3.6,0.037,0.032,-0.026,97.0,6.7,-6.8,,,,0
101,TOI-1338 A,TOI-1338 A c,2024-08-07,2,0.724598,0.016,-0.016,75.4,4.0,-3.6,0.037,0.032,-0.026,97.0,6.7,-6.8,,,,0
1,TOI-1408,TOI-1338 A b,2024-08-07,2,0.420509,0.0086,-0.0088,11.3,2.1,-2.1,0.0331,0.0022,-0.0021,90.494,0.013,-0.014,,,,0
4,TOI-1408,TOI-1338 A b,2024-08-07,2,0.420509,0.0086,-0.0088,11.3,2.1,-2.1,0.0331,0.0022,-0.0021,90.494,0.013,-0.014,,,,0
10,TOI-1408,TOI-1408 b,2024-08-07,2,0.05778,0.0001,-0.0001,593.0,4.0,-4.0,0.0023,0.0005,-0.0005,82.4,0.2,-0.2,,,,0
96,TOI-1408,TOI-1408 b,2024-08-07,2,0.05778,0.0001,-0.0001,593.0,4.0,-4.0,0.0023,0.0005,-0.0005,82.4,0.2,-0.2,,,,0
11,TOI-1408,TOI-1408 c,2024-08-07,2,0.03587,8e-05,-8e-05,7.6,0.2,-0.2,0.1353,0.0001,-0.0001,82.6,0.3,-0.3,,,,0
97,TOI-1408,TOI-1408 c,2024-08-07,2,0.03587,8e-05,-8e-05,7.6,0.2,-0.2,0.1353,0.0001,-0.0001,82.6,0.3,-0.3,,,,0


In [22]:
df

Unnamed: 0,hostname,pl_name,rowupdate,sy_pnum,pl_orbsmax,pl_orbsmaxerr1,pl_orbsmaxerr2,pl_bmasse,pl_bmasseerr1,pl_bmasseerr2,pl_orbeccen,pl_orbeccenerr1,pl_orbeccenerr2,pl_orbincl,pl_orbinclerr1,pl_orbinclerr2,pl_trueobliq,pl_trueobliqerr1,pl_trueobliqerr2,flag
0,TOI-1408,TOI-421 b,2024-08-07,2,0.055400,0.00100,-0.00100,6.700000,0.600000,-0.600000,0.130000,0.0500,-0.0500,85.680,0.360,-0.460,,,,0
1,TOI-1408,TOI-1338 A b,2024-08-07,2,0.420509,0.00860,-0.00880,11.300000,2.100000,-2.100000,0.033100,0.0022,-0.0021,90.494,0.013,-0.014,,,,0
2,TOI-1408,TOI-421 b,2024-08-07,2,0.055400,0.00100,-0.00100,6.700000,0.600000,-0.600000,0.130000,0.0500,-0.0500,85.680,0.360,-0.460,,,,0
3,TOI-1408,TOI-421 c,2024-08-07,2,0.117000,0.00180,-0.00180,14.100000,1.400000,-1.400000,0.190000,0.0400,-0.0400,88.353,0.078,-0.084,,,,0
4,TOI-1408,TOI-1338 A b,2024-08-07,2,0.420509,0.00860,-0.00880,11.300000,2.100000,-2.100000,0.033100,0.0022,-0.0021,90.494,0.013,-0.014,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2203,GJ 3929,GJ 3929 b,2022-08-01,2,0.025445,0.00069,-0.00069,1.480000,0.420000,-0.435000,0.000000,0.0000,0.0000,89.046,0.119,-0.154,,,,01+1-
2204,K2-275,K2-275 b,2021-09-20,2,0.033850,0.00720,-0.00425,5.912893,1.869622,-1.358232,0.310663,0.0000,0.0000,88.019,1.431,-2.864,,,,01+-2+-
2205,Kepler-2000,Kepler-2000 c,2017-05-08,2,0.123400,0.00000,0.00000,2.693291,1.625605,-1.084042,0.000000,0.0000,0.0000,89.950,0.000,0.000,,,,01+1-2+-3+3-4+4-
2206,Kepler-1996,Kepler-1996 c,2017-05-08,2,0.348200,0.00000,0.00000,5.733366,2.223574,-1.519825,0.000000,0.0000,0.0000,89.950,0.000,0.000,,,,01+1-2+-3+3-4+4-


### Task 8: compute the NAMD