In [None]:
"""
Exoplanet Habitability Analysis: Searching for Earth 2.0
NASA Exoplanet Archive | Codédex February 2026 Challenge
Alan Geirnaert
"""

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# dark theme colors for all charts
COLORS = {
    'bg_dark': '#0a0a0f',
    'accent_blue': '#00d4ff',
    'accent_purple': '#a855f7',
    'accent_pink': '#ec4899',
    'accent_green': '#10b981',
    'accent_yellow': '#fbbf24',
    'accent_orange': '#f97316',
    'accent_red': '#ef4444',
    'text_light': '#e2e8f0',
    'goldilocks': '#22c55e',
    'earth_blue': '#3b82f6',
}

print("good to go")

In [None]:
# let's see what NASA gave us
df_raw = pd.read_csv('./exoplanets.csv', comment='#', low_memory=False)
print(f"Rows in the archive: {len(df_raw):,}")
# 39k rows, but that's not 39k planets. Many planets have multiple entries
# from different research teams measuring the same thing. We need to consolidate.

# columns we care about
numeric_cols = [
    'pl_rade',      # planet radius (Earth radii)
    'pl_bmasse',    # planet mass (Earth masses)
    'pl_eqt',       # equilibrium temperature (K)
    'pl_insol',     # insolation flux (Earth flux)
    'pl_orbper',    # orbital period (days)
    'pl_orbsmax',   # semi-major axis (AU)
    'pl_orbeccen',  # orbital eccentricity
    'st_teff',      # star temperature (K)
    'st_rad',       # star radius (Solar radii)
    'st_mass',      # star mass (Solar masses)
    'st_lum',       # star luminosity (log Solar)
    'sy_dist',      # distance from us (parsecs)
]

categorical_cols = ['hostname', 'st_spectype', 'discoverymethod', 'disc_facility']

# strategy: for numeric stuff, take the median of all measurements.
# for categorical, just grab the first non-null value.
# for discovery year, take the earliest one (= when it was actually first found).

def consolidate(group):
    result = {'pl_name': group.name}
    for col in numeric_cols:
        if col in group.columns:
            vals = group[col].dropna()
            result[col] = vals.median() if len(vals) > 0 else np.nan
            result[f'{col}_count'] = len(vals)
        else:
            result[col] = np.nan
            result[f'{col}_count'] = 0
    for col in categorical_cols:
        if col in group.columns:
            non_null = group[col].dropna()
            result[col] = non_null.iloc[0] if len(non_null) > 0 else np.nan
    if 'disc_year' in group.columns:
        years = group['disc_year'].dropna()
        result['disc_year'] = int(years.min()) if len(years) > 0 else np.nan
    result['source_row_count'] = len(group)
    return pd.Series(result)

df = df_raw.groupby('pl_name').apply(consolidate).reset_index(drop=True)
print(f"After consolidation: {len(df):,} unique planets")
# 6,100 planets. that's our actual dataset.

In [None]:
# ok so the big question: which of these 6,100 planets could actually be habitable?
# I'm going to use what astronomers call the "Goldilocks" criteria. not too hot,
# not too cold, not too big, not too small. just right.

# 1. size: 0.5 to 1.6 Earth radii (small enough to be rocky, not a gas ball)
# 2. temperature: 180K to 310K (roughly -93°C to 37°C, the liquid water range)
# 3. star type: F, G, or K class (stable stars that don't fry their planets)

RADIUS_MIN, RADIUS_MAX = 0.5, 1.6
TEMP_MIN, TEMP_MAX = 180, 310
INSOL_MIN, INSOL_MAX = 0.2, 2.0  # backup check using insolation flux

print("Filters set. let's see how many survive.")

In [None]:
# applying filters one by one to see where we lose planets

total_planets = len(df)  # 6,100

# helper: check temp or insolation (some planets have one but not the other)
def is_habitable_temp(row):
    if pd.notna(row['pl_eqt']) and TEMP_MIN <= row['pl_eqt'] <= TEMP_MAX:
        return True
    if pd.notna(row['pl_insol']) and INSOL_MIN <= row['pl_insol'] <= INSOL_MAX:
        return True
    return False

def is_sunlike_star(spec_type):
    if pd.isna(spec_type):
        return False
    return str(spec_type).strip()[0] in ['F', 'G', 'K']

# step by step filtering, tracking how many we lose at each stage
funnel_data = [('All Confirmed Exoplanets', 6100)]

stage1 = df[df['pl_rade'].notna()]
funnel_data.append(('With Radius Measurement', len(stage1)))  # 4,579

stage2 = stage1[(stage1['pl_rade'] >= RADIUS_MIN) & (stage1['pl_rade'] <= RADIUS_MAX)]
funnel_data.append(('Rocky Size (0.5-1.6 R⊕)', len(stage2)))  # 1,136

stage3 = stage2[stage2['pl_eqt'].notna() | stage2['pl_insol'].notna()]
funnel_data.append(('With Temperature Data', len(stage3)))  # 1,080

stage3 = stage3.copy()
stage3['is_habitable'] = stage3.apply(is_habitable_temp, axis=1)
stage4 = stage3[stage3['is_habitable']]
funnel_data.append(('Habitable Temperature', len(stage4)))  # 30

stage4 = stage4.copy()
stage4['sunlike_star'] = stage4['st_spectype'].apply(is_sunlike_star)
earth_candidates = stage4[stage4['sunlike_star']].copy()
funnel_data.append(('Sun-like Star (F, G, K)', len(earth_candidates)))  # 2

# print the funnel
for i, (stage, count) in enumerate(funnel_data):
    if i == 0:
        print(f"Starting: {count:,} planets")
    else:
        lost = funnel_data[i-1][1] - count
        print(f"  → {stage}: {count:,} ({lost:,} cut)")

print(f"\n...2. out of 6,100, only 2 made it through.")
print("Kepler-452 b and Kepler-62 f. that's it.")

In [None]:
# visualizing that funnel. it's kind of brutal how fast the numbers drop

stages = [s[0] for s in funnel_data]
values = [s[1] for s in funnel_data]

fig = go.Figure(go.Funnel(
    y=stages, x=values,
    textposition="inside",
    textinfo="value+percent initial",
    opacity=0.85,
    marker=dict(
        color=[COLORS['accent_blue'], COLORS['accent_purple'], COLORS['accent_pink'],
               COLORS['accent_orange'], COLORS['accent_yellow'], COLORS['goldilocks']],
        line=dict(width=2, color="white")
    ),
    connector=dict(line=dict(color="white", width=1))
))

fig.update_layout(
    title=dict(text="The Habitability Funnel<br><sup>6,100 planets → 2 candidates</sup>",
               font=dict(size=24, color=COLORS['text_light'])),
    paper_bgcolor=COLORS['bg_dark'], plot_bgcolor=COLORS['bg_dark'],
    font=dict(color=COLORS['text_light']), height=600,
)
fig.show()

# the biggest drop is at the size filter: 3,443 planets gone.
# most exoplanets we've found are gas giants. easy to detect, hard to live on.

In [None]:
# why are most planets too big? let's look at the size distribution

df_radius = df[df['pl_rade'].notna()]

rocky = df_radius[(df_radius['pl_rade'] >= RADIUS_MIN) & (df_radius['pl_rade'] <= RADIUS_MAX)]['pl_rade']
too_big = df_radius[df_radius['pl_rade'] > RADIUS_MAX]['pl_rade']
too_small = df_radius[df_radius['pl_rade'] < RADIUS_MIN]['pl_rade']

fig = go.Figure()
fig.add_trace(go.Histogram(x=too_big, name='Too Big (gas giants etc)',
                           marker_color=COLORS['accent_purple'], opacity=0.7,
                           xbins=dict(start=0.1, end=30, size=0.3)))
fig.add_trace(go.Histogram(x=rocky, name='Rocky / Goldilocks size',
                           marker_color=COLORS['goldilocks'], opacity=0.9,
                           xbins=dict(start=0.1, end=30, size=0.3)))
fig.add_trace(go.Histogram(x=too_small, name='Sub-Earth',
                           marker_color=COLORS['accent_blue'], opacity=0.7,
                           xbins=dict(start=0.1, end=30, size=0.3)))

fig.add_vline(x=1.0, line_dash="dash", line_color=COLORS['earth_blue'],
              annotation_text="Earth", annotation_position="top")
fig.add_vrect(x0=RADIUS_MIN, x1=RADIUS_MAX, fillcolor=COLORS['goldilocks'],
              opacity=0.1, line_width=0, annotation_text="Goldilocks Zone")

fig.update_layout(
    title='Planet Size Distribution<br><sup>log scale, the green zone is what we want</sup>',
    xaxis_title='Radius (Earth = 1.0)', yaxis_title='Count',
    barmode='overlay',
    paper_bgcolor=COLORS['bg_dark'], plot_bgcolor=COLORS['bg_dark'],
    font=dict(color=COLORS['text_light']), height=500, xaxis_type="log",
)
fig.show()

print(f"Rocky-sized: {len(rocky):,} | Too big: {len(too_big):,} | Too small: {len(too_small):,}")
# the purple mountain of gas giants dominates. we're swimming in Jupiters.

In [None]:
# now the real question: right size AND right temperature?
# plotting every planet with both measurements on a temp vs radius scatter

df_temp = df[(df['pl_rade'].notna()) & (df['pl_eqt'].notna())].copy()
df_temp['in_goldilocks'] = (
    (df_temp['pl_rade'] >= RADIUS_MIN) & (df_temp['pl_rade'] <= RADIUS_MAX) &
    (df_temp['pl_eqt'] >= TEMP_MIN) & (df_temp['pl_eqt'] <= TEMP_MAX))

fig = px.scatter(df_temp, x='pl_eqt', y='pl_rade',
                 color='in_goldilocks',
                 color_discrete_map={True: COLORS['goldilocks'], False: COLORS['accent_purple']},
                 hover_name='pl_name',
                 hover_data={'pl_eqt': ':.0f', 'pl_rade': ':.2f', 'hostname': True},
                 opacity=0.6,
                 labels={'pl_eqt': 'Temperature (K)', 'pl_rade': 'Radius (Earth Radii)'})

# draw the goldilocks box
fig.add_shape(type="rect", x0=TEMP_MIN, x1=TEMP_MAX, y0=RADIUS_MIN, y1=RADIUS_MAX,
              line=dict(color=COLORS['goldilocks'], width=3, dash="dash"),
              fillcolor=COLORS['goldilocks'], opacity=0.1)

# earth for reference
fig.add_trace(go.Scatter(x=[255], y=[1.0], mode='markers+text',
                         marker=dict(size=15, color=COLORS['earth_blue'], symbol='star'),
                         text=['Earth'], textposition='top center', name='Earth'))

fig.update_layout(
    title='Temperature vs Size: The Habitability Map',
    paper_bgcolor=COLORS['bg_dark'], plot_bgcolor=COLORS['bg_dark'],
    font=dict(color=COLORS['text_light']),
    yaxis_type="log", height=600,
)
fig.show()

# that green box is tiny compared to the full scatter. finding earth-like
# planets is literally looking for a needle in a haystack.

In [None]:
# when did we start finding all these planets?

yearly = df.groupby('disc_year').size().reset_index(name='count')
yearly_rocky = df[(df['pl_rade'] >= RADIUS_MIN) & (df['pl_rade'] <= RADIUS_MAX)].groupby('disc_year').size().reset_index(name='rocky')
yearly = yearly.merge(yearly_rocky, on='disc_year', how='left').fillna(0)
yearly['cumulative'] = yearly['count'].cumsum()

fig = make_subplots(rows=2, cols=1, subplot_titles=('Discoveries Per Year', 'Cumulative Total'),
                    vertical_spacing=0.12)

fig.add_trace(go.Bar(x=yearly['disc_year'], y=yearly['count'], name='All',
                     marker_color=COLORS['accent_purple'], opacity=0.7), row=1, col=1)
fig.add_trace(go.Bar(x=yearly['disc_year'], y=yearly['rocky'], name='Rocky-sized',
                     marker_color=COLORS['goldilocks'], opacity=0.9), row=1, col=1)

fig.add_trace(go.Scatter(x=yearly['disc_year'], y=yearly['cumulative'], mode='lines+markers',
                         name='Total', line=dict(color=COLORS['accent_purple'], width=3),
                         fill='tozeroy', fillcolor='rgba(168,85,247,0.2)'), row=2, col=1)

# marking the missions that changed everything
for year, name, color in [(2009, 'Kepler', COLORS['accent_yellow']),
                          (2018, 'TESS', COLORS['accent_blue']),
                          (2022, 'JWST', COLORS['accent_orange'])]:
    fig.add_vline(x=year, line_dash="dash", line_color=color, row=1, col=1,
                  annotation_text=name, annotation_position="top")

fig.update_layout(
    title=dict(text="Discovery Timeline", font=dict(size=22)),
    paper_bgcolor=COLORS['bg_dark'], plot_bgcolor=COLORS['bg_dark'],
    font=dict(color=COLORS['text_light']), height=700,
    barmode='overlay',
)
fig.show()

# that spike around 2014-2016 is the Kepler mission going crazy.
# before Kepler launched in 2009, we barely had a few hundred confirmed planets.

In [None]:
# let's look at our 2 survivors more closely

candidates = earth_candidates[['pl_name', 'pl_rade', 'pl_eqt', 'st_spectype',
                                'sy_dist', 'disc_year', 'discoverymethod']].copy()

print("Our 2 Earth 2.0 candidates:")
for _, row in candidates.iterrows():
    print(f"\n  {row['pl_name']}")
    print(f"    Radius: {row['pl_rade']:.2f}x Earth")
    print(f"    Temperature: {row['pl_eqt']:.0f}K")
    print(f"    Star type: {row['st_spectype']}")
    print(f"    Distance: {row['sy_dist']:.1f} parsecs")
    print(f"    Found: {int(row['disc_year'])} via {row['discoverymethod']}")

# both found by Kepler, both orbiting sun-like stars, both slightly bigger than Earth.
# Kepler-452 b orbits a G2 star (same class as our Sun). that's as close as it gets.

fig = make_subplots(rows=1, cols=2,
                    subplot_titles=("Size (Earth = 1.0)", "Temperature (Earth ≈ 255K)"))

fig.add_trace(go.Bar(y=candidates['pl_name'], x=candidates['pl_rade'], orientation='h',
                     marker_color=COLORS['goldilocks'], name='Radius'), row=1, col=1)
fig.add_vline(x=1.0, line_dash="dash", line_color=COLORS['earth_blue'], row=1, col=1)

temp_data = candidates[candidates['pl_eqt'].notna()]
fig.add_trace(go.Bar(y=temp_data['pl_name'], x=temp_data['pl_eqt'], orientation='h',
                     marker_color=COLORS['accent_blue'], name='Temp'), row=1, col=2)
fig.add_vline(x=255, line_dash="dash", line_color=COLORS['earth_blue'], row=1, col=2)

fig.update_layout(
    title=dict(text="The 2 Candidates vs Earth", font=dict(size=24)),
    paper_bgcolor=COLORS['bg_dark'], plot_bgcolor=COLORS['bg_dark'],
    font=dict(color=COLORS['text_light']), height=400, showlegend=False,
)
fig.show()

## Going deeper

Ok so we found 2 candidates. But something's been bugging me: are we actually *good* at finding Earth-like planets, or are our detection methods biased toward finding big, close-in ones?

Let's do some real data science on this. Three things I want to check:
1. **Detection bias**: what each discovery method can and can't see
2. **Correlations**: which planet properties are linked to each other
3. **Earth Similarity Index**: scoring every planet on a 0-1 scale instead of pass/fail

In [None]:
# 1. DETECTION BIAS
# the way we find planets affects what we find. transit method watches for
# brightness dips (favors close-in planets), radial velocity measures star wobble
# (favors heavy planets). Earth sits in the blind spot of both.

df_bias = df[df['pl_rade'].notna() & df['pl_orbper'].notna() & df['discoverymethod'].notna()].copy()

method_counts = df_bias['discoverymethod'].value_counts()
top_methods = method_counts.head(5).index.tolist()
df_bias['method_group'] = df_bias['discoverymethod'].apply(lambda x: x if x in top_methods else 'Other')

method_colors = {
    'Transit': COLORS['accent_yellow'],
    'Radial Velocity': COLORS['accent_blue'],
    'Imaging': COLORS['accent_purple'],
    'Microlensing': COLORS['accent_pink'],
    'Transit Timing Variations': COLORS['accent_green'],
    'Other': '#666666',
}

fig = go.Figure()
for method, color in method_colors.items():
    subset = df_bias[df_bias['method_group'] == method]
    if len(subset) == 0:
        continue
    fig.add_trace(go.Scattergl(
        x=subset['pl_orbper'], y=subset['pl_rade'],
        mode='markers', name=f'{method} ({len(subset):,})',
        text=subset['pl_name'],
        marker=dict(color=color, size=4 if method == 'Transit' else 6,
                    opacity=0.4 if method == 'Transit' else 0.7),
        hovertemplate='<b>%{text}</b><br>Period: %{x:.1f}d<br>Radius: %{y:.2f} R⊕<extra>' + method + '</extra>',
    ))

# earth for scale
fig.add_trace(go.Scatter(
    x=[365.25], y=[1.0], mode='markers+text',
    marker=dict(size=14, color=COLORS['earth_blue'], symbol='star'),
    text=['Earth'], textposition='top center', name='Earth',
))

# rocky zone band
fig.add_shape(type="rect", x0=0.1, x1=1e6, y0=RADIUS_MIN, y1=RADIUS_MAX,
              fillcolor=COLORS['goldilocks'], opacity=0.06,
              line=dict(color=COLORS['goldilocks'], width=1, dash='dot'))

fig.update_layout(
    title='Detection Bias: What Each Method Can See<br><sup>Earth sits in the blind spot</sup>',
    xaxis_title='Orbital Period (days)', yaxis_title='Planet Radius (Earth Radii)',
    xaxis_type='log', yaxis_type='log',
    paper_bgcolor=COLORS['bg_dark'], plot_bgcolor=COLORS['bg_dark'],
    font=dict(color=COLORS['text_light']),
    legend=dict(orientation='h', y=1.12, x=0.5, xanchor='center'),
    height=600,
)
fig.show()

# look at where Earth sits on this chart. 365 days, 1.0 radius.
# it's in a nearly empty zone. transit (yellow) clusters at short periods,
# radial velocity (blue) clusters at large radii. we're blind right where
# we need to look. that's not a coincidence, it's physics.

In [None]:
# 2. CORRELATION HEATMAP
# checking which planet/star properties tend to move together.
# correlation goes from -1 (opposite) to +1 (move together), 0 = no link.

corr_features = ['pl_rade', 'pl_bmasse', 'pl_eqt', 'pl_insol',
                  'pl_orbper', 'st_teff', 'st_rad', 'st_mass', 'sy_dist']
corr_labels = ['Radius', 'Mass', 'Temp', 'Insolation',
               'Orbital Period', 'Star Temp', 'Star Radius', 'Star Mass', 'Distance']

corr_df = df[corr_features].dropna(how='all')
corr_matrix = corr_df.corr(method='pearson')

fig = go.Figure(go.Heatmap(
    z=corr_matrix.values, x=corr_labels, y=corr_labels,
    colorscale=[[0, '#3a5a8c'], [0.25, '#2a3a5c'], [0.5, '#1a1a2e'],
                [0.75, '#6a5a3c'], [1, '#c4915c']],
    zmin=-1, zmax=1,
    text=np.round(corr_matrix.values, 2), texttemplate='%{text}',
    textfont=dict(size=10),
    hovertemplate='%{y} vs %{x}<br>r = %{z:.3f}<extra></extra>',
    colorbar=dict(title='r'),
))

fig.update_layout(
    title='Correlation Matrix<br><sup>gold = rise together, blue = opposite, dark = nothing</sup>',
    paper_bgcolor=COLORS['bg_dark'], plot_bgcolor=COLORS['bg_dark'],
    font=dict(color=COLORS['text_light']),
    height=600, width=700,
    xaxis=dict(tickangle=-45), yaxis=dict(autorange='reversed'),
)
fig.show()

# some obvious stuff: radius and mass correlate at 0.66 (bigger = heavier, duh).
# star temp and star radius at 0.53 (hotter stars tend to be bigger).
# the interesting one: distance (sy_dist) is basically 0 with everything else.
# that means our sample isn't distance-biased. we're not just finding weird
# planets nearby. the dataset is actually pretty representative.

In [None]:
# 3. EARTH SIMILARITY INDEX (ESI)
# problem with our filters: they're pass/fail. a planet at 311K (1 degree over
# the 310K limit) gets the same "no" as a 2000K lava world. that feels wrong.
#
# ESI gives every planet a score from 0 (nothing like Earth) to 1 (identical).
# it uses radius, density, temperature, and insolation with different weights.
# temperature gets the highest weight (5.58) because it determines liquid water.

# grab density from the raw data
dens_data = df_raw.groupby('pl_name')['pl_dens'].median().reset_index()
df = df.merge(dens_data, on='pl_name', how='left')

def esi(row):
    # Earth reference values and weights from Schulze-Makuch et al. 2011
    params = []
    if pd.notna(row.get('pl_rade')): params.append((row['pl_rade'], 1.0, 0.57))
    if pd.notna(row.get('pl_dens')): params.append((row['pl_dens'], 5.51, 1.07))
    if pd.notna(row.get('pl_eqt')):  params.append((row['pl_eqt'], 255.0, 5.58))
    if pd.notna(row.get('pl_insol')): params.append((row['pl_insol'], 1.0, 0.70))
    if len(params) < 2:
        return np.nan
    n = sum(w for _, _, w in params)
    score = 1.0
    for val, earth_val, weight in params:
        if val + earth_val == 0: continue
        score *= (1.0 - abs(val - earth_val) / (val + earth_val)) ** (weight / n)
    return round(score, 4)

df['esi'] = df.apply(esi, axis=1)

esi_valid = df[df['esi'].notna()]['esi']
print(f"Computed ESI for {len(esi_valid):,} planets")
print(f"Mean: {esi_valid.mean():.4f} | Median: {esi_valid.median():.4f}")
print(f"ESI > 0.8: {(esi_valid > 0.8).sum()} planets")
print(f"ESI > 0.9: {(esi_valid > 0.9).sum()} planets")

# top 30 most Earth-like
top30 = df[df['esi'].notna()].nlargest(30, 'esi')
is_candidate = top30['pl_name'].isin(earth_candidates['pl_name'])

fig = make_subplots(rows=1, cols=2,
                    subplot_titles=('ESI Distribution (all planets)', 'Top 30 Most Earth-like'),
                    column_widths=[0.4, 0.6])

fig.add_trace(go.Histogram(x=esi_valid, nbinsx=50,
              marker_color=COLORS['accent_purple'], opacity=0.7), row=1, col=1)
fig.add_vline(x=0.8, line_dash='dash', line_color=COLORS['goldilocks'], row=1, col=1,
              annotation_text='>0.8 = very Earth-like')

colors = ['#c4915c' if c else '#7895a8' for c in is_candidate]
fig.add_trace(go.Bar(x=top30['esi'].values[::-1], y=top30['pl_name'].values[::-1],
                     orientation='h', marker=dict(color=colors[::-1], opacity=0.4),
                     width=0.3, showlegend=False, hoverinfo='skip'), row=1, col=2)
fig.add_trace(go.Scatter(
    x=top30['esi'].values[::-1], y=top30['pl_name'].values[::-1],
    mode='markers+text',
    marker=dict(color=colors[::-1],
                size=[14 if c else 10 for c in is_candidate][::-1],
                symbol=['star' if c else 'circle' for c in is_candidate][::-1]),
    text=[f'{v:.3f}' for v in top30['esi'].values[::-1]],
    textposition='middle right', textfont=dict(size=9, color=COLORS['text_light']),
    showlegend=False,
), row=1, col=2)

fig.add_vline(x=1.0, line_dash='dash', line_color=COLORS['earth_blue'], row=1, col=2,
              annotation_text='Earth')

fig.update_layout(
    title='Earth Similarity Index<br><sup>★ = also passed our filters | closer to 1.0 = more Earth-like</sup>',
    paper_bgcolor=COLORS['bg_dark'], plot_bgcolor=COLORS['bg_dark'],
    font=dict(color=COLORS['text_light']),
    height=650, xaxis2=dict(range=[0.5, 1.05]),
)
fig.show()

# interesting: lots of high-ESI planets didn't pass our strict funnel.
# ESI captures "degrees of similarity". you can score 0.85 and still miss
# one criterion by a hair. it's a scorecard, not a gate.

In [None]:
# final look at our candidates

print("=" * 50)
print("THE TWO EARTH 2.0 CANDIDATES")
print("=" * 50)

for _, row in earth_candidates.iterrows():
    dist_info = f"  Distance: {row['sy_dist']:.1f} parsecs" if pd.notna(row['sy_dist']) else ""
    print(f"\n{row['pl_name']}:")
    print(f"  Radius: {row['pl_rade']:.2f}x Earth")
    print(f"  Temperature: {row['pl_eqt']:.0f}K")
    print(f"  Host star: {row['st_spectype']}")
    print(f"{dist_info}")

print("\nboth discovered by Kepler. both slightly larger than Earth.")
print("both colder than Earth but within the liquid water range.")
print("Kepler-452 b orbits a G2 star, the same spectral class as our Sun.")

In [None]:
# wrap up

print("2 out of 6,100 confirmed exoplanets passed all our habitability filters.")
print("that's 0.0328%.\n")
print("but here's the thing: our detection methods are biased against finding")
print("Earth-like planets in the first place. the real number could be much higher.")
print("we just can't see them yet.\n")
print("JWST is already changing that. ask me again in 5 years.")