# ExoNAMD API v1.0


## Summary

The observer wants to compute the relative and/or absolute NAMD of:

- a given multiplanetary system;
- a subset of multiplanetary systems;
- all the known ones.

This tool handles all of the above cases.


In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from datetime import datetime
from datetime import timedelta
import warnings

from exonamd.utils import ROOT
from exonamd.utils import fetch_aliases
from exonamd.utils import update_host
from exonamd.utils import update_planet
from exonamd.solve import solve_values
from exonamd.interp import interp_eccentricity
from exonamd.interp import interp_mass
from exonamd.interp import interp_inclination
from exonamd.interp import interp_sma


warnings.filterwarnings("ignore")
pd.options.display.max_columns = 20
pd.options.display.max_rows = 30
pd.options.mode.copy_on_write = True

### Task 1: getting the data

This task retrieves the parameters of confirmed systems from the NASA Exoplanet Archive database, and stores them in a local database.


In [22]:
# Define the URL for the API
url = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"

# Define the date you want to filter by
from_scratch = False  # change to False in production
if from_scratch:
    latest = datetime.strptime("1990-01-01", "%Y-%m-%d")  # Example date
else:
    # df_old = pd.read_csv(os.path.join(ROOT, "data", "exo.csv"))
    df_old = pd.read_csv(os.path.join("../exonamd/", "data", "exo.csv"))
    latest = df_old["rowupdate"].max()
    latest = datetime.strptime(latest, "%Y-%m-%d")  # Example date
    latest = latest - timedelta(days=1)

# Convert the date to a string in the format 'YYYY-MM-DD'
latest = latest.strftime("%Y-%m-%d")

# Define the multiplicity you want to filter by
multiplicity = 1

# Define the SQL query to retrieve the required data
query = f"""
SELECT 
    hostname, 
    pl_name, 
    default_flag,
    rowupdate,
    sy_pnum, 
    st_rad,
    st_mass,
    pl_orbper,
    pl_orbsmax, 
    pl_orbsmaxerr1, 
    pl_orbsmaxerr2, 
    pl_rade,
    pl_radeerr1,
    pl_radeerr2,
    pl_bmasse, 
    pl_bmasseerr1, 
    pl_bmasseerr2, 
    pl_orbeccen, 
    pl_orbeccenerr1, 
    pl_orbeccenerr2, 
    pl_orbincl, 
    pl_orbinclerr1, 
    pl_orbinclerr2,
    pl_trueobliq,
    pl_trueobliqerr1,
    pl_trueobliqerr2,
    pl_ratdor,
    pl_ratror
FROM ps
WHERE
    sy_pnum > '{multiplicity}'
    AND rowupdate > '{latest}'
"""

# Define the parameters for the request
params = {
    "query": query,
    "format": "json",
}

# Make the request to the API
response = requests.get(url, params=params)

if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    # Convert the JSON data to a pandas DataFrame
    df = pd.DataFrame(data)
else:
    print(f"Error: {response.status_code}")

Deal with None values


In [23]:
df = df.replace({None: np.nan, "": np.nan})

### Task 2: dealing with the aliases


Fetch aliases


In [24]:
aliases = fetch_aliases(df["hostname"].unique())

Fetched 3/3 entries on try 1


Curate aliases


In [25]:
df["hostname"] = df.apply(update_host, args=(aliases,), axis=1)
df["pl_name"] = df.apply(update_planet, args=(aliases,), axis=1)

In [26]:
# Double check that the names are consistent

for hostname in df["hostname"].unique():
    df_host = df[df["hostname"] == hostname]
    names = df_host["pl_name"]
    if len(set([name[:3] for name in names])) > 1:
        print(f"Inconsistent name for {hostname}")

### Task 3: computing missing values (if any) from simple equations


In [27]:
df[
    [
        "pl_orbsmax",
        "pl_ratdor",
        "st_rad",
        "pl_rade",
        "pl_ratror",
        "pl_orbper",
        "st_mass",
    ]
] = df.apply(solve_values, axis=1, result_type="expand")

### Task 4: storing the curated database


In [28]:
if not from_scratch:
    df_new = df.copy()
    df = pd.concat([df_new, df_old], ignore_index=True)
    df = df.drop_duplicates(keep="last")

df.to_csv(os.path.join("../exonamd/", "data", "exo.csv"), index=False)

In [43]:
# df = pd.read_csv(os.path.join("../exonamd/", "data", "exo.csv"))

Drop columns that are no longer needed


In [44]:
df.drop(
    columns=[
        "pl_ratdor",
        "st_rad",
        "pl_ratror",
        "pl_orbper",
        "st_mass",
    ],
    inplace=True,
)

### Task 5: input missing values (if any) by interpolation


Use nanmedian to thin down the data


In [45]:
keep_indices = []

for planet in df["pl_name"].unique():
    df_planet = df[df["pl_name"] == planet]
    default_index = df_planet[df_planet["default_flag"] == 1].index
    for col in [
        c
        for c in df_planet.columns
        if c not in ["hostname", "pl_name", "default_flag", "rowupdate"]
    ]:
        avg = np.nanmedian(df_planet[col].values)
        df.loc[default_index, col] = avg
    keep_indices.extend(default_index)

df.drop(df.index[~df.index.isin(keep_indices)], inplace=True)
df.drop(columns="default_flag", inplace=True)

Instantiate flags


In [46]:
df["flag"] = "0"

Solve missing eccentricity values


In [47]:
df[
    [
        "pl_orbeccen",
        "pl_orbeccenerr1",
        "pl_orbeccenerr2",
        "flag",
    ]
] = df.apply(interp_eccentricity, axis=1, result_type="expand")

Solve missing planetary mass values


In [48]:
df[
    [
        "pl_bmasse",
        "pl_bmasseerr1",
        "pl_bmasseerr2",
        "flag",
    ]
] = df.apply(interp_mass, axis=1, result_type="expand")

Drop columns that are no longer needed


In [49]:
df = df.drop(columns=["pl_rade", "pl_radeerr1", "pl_radeerr2"])

Remove systems where at least one planet has no mass or semi-major axis (if any)


In [50]:
remove_indices = []
for hostname in df["hostname"].unique():
    df_host = df[df["hostname"] == hostname]
    if df_host[["pl_bmasse", "pl_orbsmax"]].isnull().any().any():
        # print(
        #     f"Removing {hostname} "
        #     "due to no mass or semi-major axis."
        # )
        remove_indices.extend(df_host.index)
df.drop(df.index[df.index.isin(remove_indices)], inplace=True)

Solve missing values in inclinations


In [51]:
df[
    [
        "pl_orbincl",
        "pl_orbinclerr1",
        "pl_orbinclerr2",
        "flag",
    ]
] = df.apply(interp_inclination, args=(df,), axis=1, result_type="expand")

Solve missing values in semi-major axis uncertainties


In [52]:
df[
    [
        "pl_orbsmaxerr1",
        "pl_orbsmaxerr2",
        "flag",
    ]
] = df.apply(interp_sma, axis=1, result_type="expand")

### Task 6: storing the curated+interpolated database


In [53]:
df.to_csv(os.path.join("../exonamd/", "data", "exo_interp.csv"), index=False)