### NexSci / NASA Exoplanet Archive Info

- [TAP User Guide](https://exoplanetarchive.ipac.caltech.edu/docs/TAP/usingTAP.html)
- [API Examples](https://exoplanetarchive.ipac.caltech.edu/docs/program_interfaces.html)
- [Table Schema](https://exoplanetarchive.ipac.caltech.edu/docs/API_PS_columns.html)

Tables to pull from
- [PSCompPars](https://exoplanetarchive.ipac.caltech.edu/cgi-bin/TblView/nph-tblView?app=ExoTbls&config=PSCompPars)


In [11]:
import requests

url = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"
query = "select pl_name, pl_rade, pl_bmasse from pscomppars where pl_name='HD 108236 c'"
params = {"query": query, "format": "json"}

response = requests.get(url, params=params)
planet_data = response.json()

print(planet_data)


[{'pl_name': 'HD 108236 c', 'pl_rade': 2.071, 'pl_bmasse': 4.94}]


In [12]:
import pandas as pd
import requests
from io import StringIO

# Set up API request
url = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"
query = "select hostname, pl_name, pl_rade, pl_bmasse, pl_orbper from pscomppars where sy_pnum > 3"
params = {"query": query, "format": "csv"}

# Make the request
response = requests.get(url, params=params)

# Read CSV content into DataFrame
df = pd.read_csv(StringIO(response.text))

print(df)


       hostname       pl_name  pl_rade  pl_bmasse  pl_orbper
0    Kepler-235  Kepler-235 e     2.22       5.56  46.183669
1    Kepler-279  Kepler-279 c     4.30      49.40  35.736000
2    Kepler-251  Kepler-251 d     2.77       8.10  30.133001
3      HD 23472    HD 23472 d     0.75       0.55   3.976640
4    Kepler-279  Kepler-279 d     3.10      37.50  54.414000
..          ...           ...      ...        ...        ...
537  Kepler-245  Kepler-245 d     3.03      21.60  36.277108
538  Kepler-186  Kepler-186 d     1.40       2.54  13.342996
539  Kepler-215  Kepler-215 b     1.62       3.26   9.360672
540  Kepler-245  Kepler-245 b     2.57     146.78   7.490190
541     KOI-351     KOI-351 c     1.19       1.81   8.719375

[542 rows x 5 columns]


In [13]:
# Sort and assign planet order
multi_df = df.sort_values(['hostname', 'pl_orbper'])
multi_df['planet_order'] = multi_df.groupby('hostname').cumcount() + 1

#Make a new column classifying each planet
def classify_planet(mass, radius):
    if mass is None or radius is None:
        return "Unknown"

    # Override for well-known sub-Neptune radius range
    if 1.9 < radius < 2.9 and mass < 20:
        return "sub-Neptune"

    # Mass-based logic
    if mass < 2:
        if radius <= 1.6:
            return "Terrestrial"
        else:
            return "Low-mass sub-Neptune"
    elif mass < 10:
        return "Super-Earth"
    elif mass < 60:
        return "Neptune-like"
    elif mass < 300:
        return "Gas Giant"
    else:
        return "Brown Dwarf"


multi_df['pl_type'] = multi_df.apply(
    lambda row: classify_planet(row['pl_bmasse'], row['pl_rade']), axis=1
)

# Reformat columns
final_df = multi_df[['hostname', 'pl_name', 'planet_order', 'pl_orbper', 'pl_rade', 'pl_bmasse', 'pl_type']]

# Show results
print(f"Found {len(final_df)} planets in {final_df['hostname'].nunique()} multi-planet systems (more than 3 planets)")
print(final_df.head())

Found 542 planets in 122 multi-planet systems (more than 3 planets)
    hostname   pl_name  planet_order    pl_orbper  pl_rade  pl_bmasse  \
439   55 Cnc  55 Cnc e             1     0.736547    1.875     7.9900   
344   55 Cnc  55 Cnc b             2    14.651600   13.900   263.9785   
56    55 Cnc  55 Cnc c             3    44.398900    8.510    54.4738   
345   55 Cnc  55 Cnc f             4   259.880000    7.590    44.8120   
438   55 Cnc  55 Cnc d             5  5574.200000   13.000  1232.4930   

          pl_type  
439   Super-Earth  
344     Gas Giant  
56   Neptune-like  
345  Neptune-like  
438   Brown Dwarf  


In [14]:
df_sorted = final_df.sort_values(['hostname', 'planet_order'])
# Combine the sequence of planet types for each system into one string
pattern_df = df_sorted.groupby('hostname')['pl_type'].apply(lambda x: ','.join(x)).reset_index()
pattern_df.rename(columns={'pl_type': 'planet_type_sequence'}, inplace=True)

print(pattern_df.head()) 

         hostname                               planet_type_sequence
0          55 Cnc  Super-Earth,Gas Giant,Neptune-like,Neptune-lik...
1  Barnard's star    Terrestrial,Terrestrial,Terrestrial,Terrestrial
2          DMPP-1   Super-Earth,Super-Earth,Super-Earth,Neptune-like
3         GJ 3293  Super-Earth,Neptune-like,sub-Neptune,Neptune-like
4        GJ 667 C  sub-Neptune,Super-Earth,Super-Earth,Super-Eart...


In [15]:
pattern_counts = pattern_df['planet_type_sequence'].value_counts().reset_index()
pattern_counts.columns = ['planet_type_sequence', 'num_systems']

print(pattern_counts.head())

                                planet_type_sequence  num_systems
0    Super-Earth,sub-Neptune,sub-Neptune,sub-Neptune            6
1    Super-Earth,Super-Earth,sub-Neptune,sub-Neptune            4
2    Terrestrial,sub-Neptune,sub-Neptune,sub-Neptune            3
3  Terrestrial,sub-Neptune,Neptune-like,Neptune-like            2
4    Terrestrial,Terrestrial,Super-Earth,Super-Earth            2


In [16]:
# Only show planet type sequences shared by more than one system
common_patterns = pattern_counts[pattern_counts['num_systems'] > 1]
print(common_patterns)

                                 planet_type_sequence  num_systems
0     Super-Earth,sub-Neptune,sub-Neptune,sub-Neptune            6
1     Super-Earth,Super-Earth,sub-Neptune,sub-Neptune            4
2     Terrestrial,sub-Neptune,sub-Neptune,sub-Neptune            3
3   Terrestrial,sub-Neptune,Neptune-like,Neptune-like            2
4     Terrestrial,Terrestrial,Super-Earth,Super-Earth            2
5     Terrestrial,Terrestrial,Terrestrial,Terrestrial            2
6       Gas Giant,sub-Neptune,sub-Neptune,Terrestrial            2
7     Terrestrial,Super-Earth,Super-Earth,Terrestrial            2
8     Super-Earth,Super-Earth,Super-Earth,Super-Earth            2
9     Terrestrial,Super-Earth,Terrestrial,Super-Earth            2
10    sub-Neptune,sub-Neptune,sub-Neptune,sub-Neptune            2
11   Super-Earth,sub-Neptune,Super-Earth,Neptune-like            2
12  Super-Earth,Super-Earth,sub-Neptune,Neptune-li...            2


In [17]:
total = common_patterns['num_systems'].sum()
print(total)

33


In [18]:
df_with_common_patterns = pattern_df[pattern_df['planet_type_sequence'].isin(common_patterns['planet_type_sequence'])]
print(df_with_common_patterns[:50])
print(df_with_common_patterns.count())

           hostname                               planet_type_sequence
1    Barnard's star    Terrestrial,Terrestrial,Terrestrial,Terrestrial
10        HD 134606  Super-Earth,Super-Earth,sub-Neptune,Neptune-li...
16         HD 20781  Terrestrial,sub-Neptune,Neptune-like,Neptune-like
18         HD 20794    Super-Earth,sub-Neptune,sub-Neptune,sub-Neptune
19        HD 215152    Terrestrial,Terrestrial,Super-Earth,Super-Earth
38       Kepler-100   Super-Earth,sub-Neptune,Super-Earth,Neptune-like
44       Kepler-132    Terrestrial,Super-Earth,Super-Earth,Terrestrial
45       Kepler-138    Terrestrial,Super-Earth,Super-Earth,Terrestrial
46      Kepler-1388    sub-Neptune,sub-Neptune,sub-Neptune,sub-Neptune
50      Kepler-1542    Terrestrial,Terrestrial,Terrestrial,Terrestrial
54       Kepler-172    sub-Neptune,sub-Neptune,sub-Neptune,sub-Neptune
55       Kepler-176      Gas Giant,sub-Neptune,sub-Neptune,Terrestrial
58      Kepler-1987    Super-Earth,Super-Earth,sub-Neptune,sub-Neptune
62    

In [19]:
import json

# Create a list to write to JSON via json.dump(). It will be a list of planet patterns.
data = []
patternNumber = 1

#Create a list of patterns
for p in common_patterns['planet_type_sequence']:
    # Construct inner dictionary
    patternProperties = {
        "name" : f"Pattern #{patternNumber}",
        "pattern" : p.split('>'), # Key is pattern name, value is an ordered list of the planet categories, made by splitting the sequence string
        "hostnames" : df_with_common_patterns[df_with_common_patterns['planet_type_sequence'] == p]['hostname'].tolist(), # Which star systems have this pattern
        "hostname_count": common_patterns[common_patterns['planet_type_sequence'] == p]['num_systems'].tolist()
    }

    data.append({f"pattern_{patternNumber}": patternProperties})
    patternNumber += 1

# Write to a JSON file
filename = "PatternData_NexSci.json"
with open(filename, 'w') as f:
   json.dump(data, f, indent=2)



In [20]:
import json

# Create a list to write to JSON via json.dump(). It will be a list of star systems, including planets.
star_system_data = {}

#Create a list of star systems
for star in df_with_common_patterns['hostname']:
    # Construct inner dictionaries

    planet_names = final_df[final_df['hostname'] == star]['pl_name'].tolist()
    planets = []
    for planet_name in planet_names:
        planet = {
            "name" : planet_name,
            "radius" : final_df[final_df['pl_name'] == planet_name]['pl_rade'].tolist(), # in Earth Radii 
            "mass" : final_df[final_df['pl_name'] == planet_name]['pl_bmasse'].tolist(), # in Earth Masses
            "orbital_period" : final_df[final_df['pl_name'] == planet_name]['pl_orbper'].tolist(),
            "category": final_df[final_df['pl_name'] == planet_name]['pl_type'].tolist(),
            "order": final_df[final_df['pl_name'] == planet_name]['planet_order'].tolist(),
        }
        planets.append(planet)

    star_system_Properties = {
        #"hostname" : star, # Star name
        "planets": planets
    }

    star_system_data[star] = star_system_Properties

# Write to a JSON file
filename = "StarSystemData_NexSci.json"
with open(filename, 'w') as f:
   json.dump(star_system_data, f, indent=2)