# ExoNAMD API v1.0

## Summary

The observer wants to compute the relative and/or absolute NAMD of:
- a given multiplanetary system;
- a subset of multiplanetary systems;
- or all the known ones.

This tool handles all of the above.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from datetime import datetime
from datetime import timedelta

pd.options.display.max_columns = 20
pd.options.display.max_rows = 30
pd.options.mode.copy_on_write = True
import warnings

warnings.filterwarnings("ignore")

from exonamd.utils import ROOT
from exonamd.utils import fetch_aliases
from exonamd.utils import update_host
from exonamd.utils import update_planet

### Task 1: getting the data

This task retrieves the parameters of confirmed systems from the NASA Exoplanet Archive database, and stores them in a local database.

In [3]:
# Define the URL for the API
url = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"

# Define the date you want to filter by
from_scratch = True  # change to False in production
if from_scratch:
    latest = datetime.strptime("1990-01-01", "%Y-%m-%d")  # Example date
else:
    df_old = pd.read_csv(os.path.join(ROOT, "data", "exo.csv"))
    latest = df_old["rowupdate"].max()
    latest = datetime.strptime(latest, "%Y-%m-%d")  # Example date
    latest = latest - timedelta(days=1)

# Convert the date to a string in the format 'YYYY-MM-DD'
latest = latest.strftime("%Y-%m-%d")

# Define the multiplicity you want to filter by
multiplicity = 5

# Define the SQL query to retrieve the required data
query = f"""
SELECT 
    hostname, 
    pl_name, 
    default_flag,
    rowupdate,
    sy_pnum, 
    st_rad,
    st_raderr1,
    st_raderr2,
    st_mass,
    st_masserr1,
    st_masserr2,
    pl_orbper,
    pl_orbpererr1,
    pl_orbpererr2,
    pl_orbsmax, 
    pl_orbsmaxerr1, 
    pl_orbsmaxerr2, 
    pl_rade,
    pl_radeerr1,
    pl_radeerr2,
    pl_bmasse, 
    pl_bmasseerr1, 
    pl_bmasseerr2, 
    pl_orbeccen, 
    pl_orbeccenerr1, 
    pl_orbeccenerr2, 
    pl_orbincl, 
    pl_orbinclerr1, 
    pl_orbinclerr2,
    pl_trueobliq,
    pl_trueobliqerr1,
    pl_trueobliqerr2,
    pl_ratdor,
    pl_ratdorerr1,
    pl_ratdorerr2,
    pl_ratror,
    pl_ratrorerr1,
    pl_ratrorerr2
FROM ps
WHERE
    sy_pnum > '{multiplicity}'
    AND rowupdate > '{latest}'
"""

# Define the parameters for the request
params = {
    "query": query,
    "format": "json",
}

# Make the request to the API
response = requests.get(url, params=params)

if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    # Convert the JSON data to a pandas DataFrame
    df = pd.DataFrame(data)
else:
    print(f"Error: {response.status_code}")

### Task 2: dealing with the aliases

Fetch aliases

In [4]:
aliases = fetch_aliases(df["hostname"].unique())

Fetched 13/13 entries on try 1


Curate aliases

In [5]:
# # For testing
# df2 = df.copy()
# df2.loc[df2['hostname'] == 'Kepler-20', 'hostname'] = 'KOI-70'
# df2.loc[df2['hostname'] == 'Kepler-80', 'hostname'] = 'KIC 4852528'
# df2['hostname'] = df2.apply(update_host, args=(aliases, True), axis=1)
df["hostname"] = df.apply(update_host, args=(aliases, False), axis=1)

# # For testing
# df2 = df.copy()
# df2.loc[df2['pl_name'] == 'Kepler-20 c', 'pl_name'] = 'KOI-70.01'
# df2.loc[df2['pl_name'] == 'Kepler-11 b', 'pl_name'] = 'KOI-157 b'
# df2['pl_name'] = df2.apply(update_planet, args=(aliases, True), axis=1)
df["pl_name"] = df.apply(update_planet, args=(aliases, False), axis=1)

In [6]:
# Double check that the names are consistent

for hostname in df["hostname"].unique():
    df_host = df[df["hostname"] == hostname]
    names = df_host["pl_name"]
    if len(set([name[:3] for name in names])) > 1:
        print(f"Inconsistent name for {hostname}")

In [7]:
df.to_csv(os.path.join(ROOT, "data", "task2.csv"), index=False)

### Task 3: computing missing values (if any) from simple equations

In [8]:
from exonamd.utils import solve_a_rs
from exonamd.utils import solve_rprs
from exonamd.utils import solve_a_period


def complete_values(row):

    print(row["hostname"], row["pl_name"])

    sma = row["pl_orbsmax"]
    ars = row["pl_ratdor"]
    rstar = row["st_rad"]
    rplanet = row["pl_rade"]
    rprs = row["pl_ratror"]
    period = row["pl_orbper"]
    mstar = row["st_mass"]

    # Rank groups
    a_rs_ = np.isnan(sma) + np.isnan(ars) + np.isnan(rstar)
    rprs_ = np.isnan(rplanet) + np.isnan(rprs) + np.isnan(rstar)
    a_period_ = np.isnan(period) + np.isnan(sma) + np.isnan(mstar)
    solve_order = np.argsort([a_rs_, rprs_, a_period_])
    for i in solve_order:
        if i == 0:
            # Solve semi-major axis -- stellar radius system of equations.
            solution = solve_a_rs(sma, rstar, ars)
            sma, rstar, ars = solution
        elif i == 1:
            # Solve planet radius -- stellar radius system of equations.
            solution = solve_rprs(rplanet, rstar, rprs)
            rplanet, rstar, rprs = solution
        elif i == 2:
            # Solve period-sma-mstar system of equations.
            solution = solve_a_period(period, sma, mstar)
            period, sma, mstar = solution
        print(i, solution)
    print(sma, ars, rstar, rplanet, rprs, period, mstar)
    return sma, ars, rstar, rplanet, rprs, period, mstar

In [9]:
df[
    [
        "pl_orbsmax",
        "pl_ratdor",
        "st_rad",
        "pl_rade",
        "pl_ratror",
        "pl_orbper",
        "st_mass",
    ]
] = df.apply(complete_values, axis=1, result_type="expand")

Kepler-20 Kepler-20 d
0 (0.3411, 0.93, 84.36)
1 (2.59, 0.93, 0.02568)
2 (77.6114427, 0.3411, 0.88)
0.3411 84.36 0.93 2.59 0.02568 77.6114427 0.88
Kepler-20 Kepler-20 d
2 (77.6115987, 0.349066, 0.94)
0 (0.349066, nan, nan)
1 (2.492, nan, nan)
0.349066 nan nan 2.492 nan 77.6115987 0.94
Kepler-20 Kepler-20 d
0 (0.3411, 0.93, 83.25)
1 (2.62, 0.93, 0.02592)
2 (77.6115987, 0.3411, 0.88)
0.3411 83.25 0.93 2.62 0.02592 77.6115987 0.88
Kepler-20 Kepler-20 d
0 (nan, 0.89, nan)
1 (2.488, 0.89, 0.025628883688403728)
2 (nan, nan, nan)
nan nan 0.89 2.488 0.025628883688403728 nan nan
Kepler-20 Kepler-20 d
0 (0.3453, 0.94, 78.3)
1 (2.75, 0.94, 0.0267)
2 (77.61184, 0.3453, 0.91)
0.3453 78.3 0.94 2.75 0.0267 77.61184 0.91
Kepler-20 Kepler-20 d
0 (nan, 0.91, nan)
1 (nan, 0.91, nan)
2 (77.61145505, 0.0001797350796571417, 0.96)
0.0001797350796571417 nan 0.91 nan nan 77.61145505 0.96
Kepler-20 Kepler-20 d
2 (77.611455, 0.3474, 0.93)
0 (0.3474, 0.92, 81.19801182603071)
1 (2.606, 0.92, 0.02596904056596109)
0.

### Task 4: storing the curated database

In [10]:
if not from_scratch:
    df_new = df.copy()
    df = pd.concat([df_new, df_old], ignore_index=True)
    df = df.drop_duplicates(keep="last")

df.to_csv(os.path.join(ROOT, "data", "exo.csv"), index=False)

In [11]:
df[df["pl_trueobliq"].notnull()][
    ["pl_name", "pl_trueobliq", "pl_trueobliqerr1", "pl_trueobliqerr2"]
]

Unnamed: 0,pl_name,pl_trueobliq,pl_trueobliqerr1,pl_trueobliqerr2


### Task 5: input missing values (if any) by interpolation

In [12]:
# code here

### Task 6: storing the curated+interpolated database

In [13]:
# code here