In [1]:
import requests
import pandas as pd

In [2]:
# Taking a look at the data base
composite_url = 'https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI?table=compositepars'
api_key = "&apikey="

In [3]:
# Printing url
print(composite_url + api_key)

https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI?table=compositepars&apikey=


In [4]:
# Pulling necessary columns from NASA CSV and cleaning data with null values
nasa_original_df = pd.read_csv(composite_url)
nasa_filter_df = nasa_original_df[["fpl_hostname","fpl_letter","fpl_name","fpl_orbper","fpl_bmasse","fpl_rade","fpl_dens","fpl_eqt","fpl_insol","fst_optmag","fst_teff","fst_mass","fst_rad","fst_age"]]
nasa_clean_df = nasa_filter_df.dropna(how='any')
nasa_clean_df.count()

fpl_hostname    332
fpl_letter      332
fpl_name        332
fpl_orbper      332
fpl_bmasse      332
fpl_rade        332
fpl_dens        332
fpl_eqt         332
fpl_insol       332
fst_optmag      332
fst_teff        332
fst_mass        332
fst_rad         332
fst_age         332
dtype: int64

In [5]:
# Pulling reference data in as a CSV and reading it as a DataFrame
phl = "PHL Habitable Exoplanets.csv"
phl_df = pd.read_csv(phl)
phl_df.head()

Unnamed: 0,Name,Type,Mass (ME),Radius (RE),Flux (SE),Teq (K),Period (days),Distance (ly),ESI
0,Teegarden's Star b,M-Warm Terran,1.05,,1.15,264,4.9,12.0,0.95
1,TOI-700 d,M-Warm Terran,,1.14,0.87,246,37.4,101.0,0.93
2,K2-72 e,M-Warm Terran,,1.29,1.11,261,24.2,217.0,0.9
3,TRAPPIST-1 d,M-Warm Subterran,0.41,0.77,1.14,263,4.0,41.0,0.9
4,Kepler-1649 c,M-Warm Terran,,1.06,0.75,237,19.5,301.0,0.9


In [6]:
# Converting Name column in reference data to a list for for loop check
phl_names = phl_df["Name"].tolist()
print(phl_names)

["Teegarden's Star b", 'TOI-700 d', 'K2-72 e', 'TRAPPIST-1 d', 'Kepler-1649 c', 'Proxima Cen b', 'GJ 1061 d', 'GJ 1061 c', 'Ross 128 b', 'GJ 273 b', 'TRAPPIST-1 e', 'Kepler-442 b', 'Wolf 1061 c', 'GJ 667 C c', 'GJ 667 C f', 'Kepler-1229 b', 'TRAPPIST-1 f', 'Kepler-62 f', "Teegarden's Star c", 'Kepler-186 f', 'GJ 667 C e', 'tau Cet f', 'TRAPPIST-1 g', 'GJ 682 b', 'Kepler-452 b', 'Kepler-62 e', 'Kepler-1652 b', 'Kepler-1544 b', 'Kepler-296 e', 'Kepler-283 c', 'K2-296 b', 'Kepler-1410 b', 'K2-3 d', 'Kepler-1638 b', 'Kepler-296 f', 'Kepler-440 b', 'Kepler-705 b', 'Kepler-1653 b', 'GJ 832 c', 'Kepler-1606 b', 'Kepler-1090 b', 'Kepler-61 b', 'K2-18 b', 'Kepler-443 b', 'Kepler-1701 b', 'Kepler-22 b', 'LHS 1140 b', 'Kepler-1552 b', 'K2-9 b', 'Kepler-1540 b', 'GJ 180 c', 'Kepler-1632 b', 'Kepler-298 d', 'GJ 163 c', 'HD 40307 g', 'K2-288 B b', 'GJ 3293 d', 'GJ 229 A c', 'Kepler-174 d', 'GJ 357 d']


In [7]:
# Checking to see how many exoplanets in reference data remain in NASA data
phl_name_list = []
for x in phl_names:
    if not (nasa_clean_df.loc[nasa_clean_df["fpl_name"] == x].empty):
        phl_name_list.append(x)
print(len(phl_name_list))

22


In [8]:
# Merging reference data and NASA data
phl_esi_df = phl_df[["Name","ESI"]]
nasa_merge_df = pd.merge(nasa_clean_df, phl_esi_df,left_on="fpl_name",right_on="Name",how="left")

# Pulling necessary columns from merged DF
nasa_clean_merge_df = nasa_merge_df[["fpl_hostname","fpl_name","fpl_orbper","fpl_bmasse","fpl_rade","fpl_dens","fpl_eqt","fpl_insol","fst_optmag","fst_teff","fst_mass","fst_rad","fst_age","ESI"]]
nasa_clean_merge_df.head(5)

Unnamed: 0,fpl_hostname,fpl_name,fpl_orbper,fpl_bmasse,fpl_rade,fpl_dens,fpl_eqt,fpl_insol,fst_optmag,fst_teff,fst_mass,fst_rad,fst_age,ESI
0,HAT-P-9,HAT-P-9 b,3.922811,238.05467,15.614,0.342,1540.0,931.0,12.297,6350.0,1.28,1.34,1.6,
1,HAT-P-25,HAT-P-25 b,3.652815,180.84527,12.722,0.483,1182.0,325.0,13.19,5519.0,1.01,0.92,3.2,
2,HAT-P-29,HAT-P-29 b,5.72319,279.6904,13.115,0.79,1271.0,433.0,11.9,6087.0,1.45,1.3,2.2,
3,HAT-P-32,HAT-P-32 b,2.150008,216.1244,22.194,0.108,1836.0,1841.0,11.289,6001.0,1.13,1.37,2.7,
4,Kepler-117,Kepler-117 b,18.795923,29.875,8.059,0.3,984.0,157.541,14.247,6150.0,1.13,1.61,5.3,


In [9]:
nasaphldf = nasa_clean_merge_df.rename(columns={"fpl_hostname":"Host Name", "fpl_name":"Exoplanet Name","fpl_orbper":"Orbital Period (days)","fpl_bmasse":"Planet Mass [Earth mass]","fpl_rade":"Planet Radius [Earth radii]","fpl_dens":"Planet Density (g/cm**3)","fpl_eqt":"Planet Equilibrium Temperature [K]","fpl_insol":"Insolation Flux [Earth flux]","fst_optmag":"Optical Magnitude [mag]","fst_teff":"Effective Temperature [K]","fst_mass":"Stellar Mass [Solar mass]","fst_rad":"Stellar Radius [Solar radii]","fst_age":"Stellar Age [Gyr]","ESI":"Earth Similarity Index (ESI)"})
nasaphldf.set_index("Exoplanet Name",inplace=True)
nasaphldf.head(50)

Unnamed: 0_level_0,Host Name,Orbital Period (days),Planet Mass [Earth mass],Planet Radius [Earth radii],Planet Density (g/cm**3),Planet Equilibrium Temperature [K],Insolation Flux [Earth flux],Optical Magnitude [mag],Effective Temperature [K],Stellar Mass [Solar mass],Stellar Radius [Solar radii],Stellar Age [Gyr],Earth Similarity Index (ESI)
Exoplanet Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
HAT-P-9 b,HAT-P-9,3.922811,238.05467,15.614,0.342,1540.0,931.0,12.297,6350.0,1.28,1.34,1.6,
HAT-P-25 b,HAT-P-25,3.652815,180.84527,12.722,0.483,1182.0,325.0,13.19,5519.0,1.01,0.92,3.2,
HAT-P-29 b,HAT-P-29,5.72319,279.6904,13.115,0.79,1271.0,433.0,11.9,6087.0,1.45,1.3,2.2,
HAT-P-32 b,HAT-P-32,2.150008,216.1244,22.194,0.108,1836.0,1841.0,11.289,6001.0,1.13,1.37,2.7,
Kepler-117 b,Kepler-117,18.795923,29.875,8.059,0.3,984.0,157.541,14.247,6150.0,1.13,1.61,5.3,
Kepler-117 c,Kepler-117,50.790391,584.78,12.341,1.74,704.0,41.863,14.247,6150.0,1.13,1.61,5.3,
Kepler-120 b,Kepler-120,6.312501,8.5,2.15,4.7,576.0,42.692,14.719,4096.0,0.72,0.53,3.63,
Kepler-120 c,Kepler-120,12.794585,2.96,1.53,4.54,455.0,16.625,14.719,4096.0,0.72,0.53,3.63,
Kepler-125 b,Kepler-125,4.164389,6.21,2.37,2.56,594.0,33.184,14.768,3810.0,0.53,0.51,3.8,
Kepler-186 b,Kepler-186,3.886791,1.24,1.07,5.56,579.0,37.372,14.599,3755.0,0.54,0.52,4.0,


In [10]:
# Requesting confirmed planets url for facility list
confirmed_url = 'https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI?table=exoplanets'
facility = pd.read_csv(confirmed_url)
fo_ryan = facility[['pl_name','pl_facility','pl_disc']]
fo_ryan.head()

KeyError: "['pl_disc'] not in index"