In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

Filter and output dataset based on our 5 criteria - planet insolation flux, planet radius, planet semimajor axis, planet mass (in Earth masses), and planet equilibrium temperature.

In [None]:
import pandas as pd
import zipfile
import requests
from io import StringIO

# Filtering and outputing dataset based on our 5 criteria.

zip_path = "raw.zip"

with zipfile.ZipFile(zip_path) as z:
    with z.open("raw.csv") as f:
        df = pd.read_csv(f)

REQUIRED = ["pl_insol", "pl_rade", "pl_orbsmax", "pl_masse", "pl_eqt"]

before = len(df)
df_filtered = df.dropna(subset=REQUIRED).copy()
after = len(df_filtered)

print(f"Rows before filter: {before}")
print(f"Rows after filter: {after}")

ID_COLS = ["pl_name", "hostname"]
keep_cols = [c for c in ID_COLS + REQUIRED if c in df_filtered.columns]
df_filtered = df_filtered[keep_cols]

display(df_filtered.head(10))
output_path = "exoplanets_filtered_required.csv"
df_filtered.to_csv(output_path, index=False)
print(f"Saved -> {output_path}")

'import pandas as pd\nimport requests\nfrom io import StringIO\n\n# Filtering and outputing dataset based on our 5 criteria.\n\npath = "/content/drive/MyDrive/PS_2025.09.20_09.02.29.csv"\ndf = pd.read_csv(path, comment="#", low_memory=False)\n\nREQUIRED = ["pl_insol", "pl_rade", "pl_orbsmax", "pl_masse", "pl_eqt"]\n\nbefore = len(df)\ndf_filtered = df.dropna(subset=REQUIRED).copy()\nafter = len(df_filtered)\n\nprint(f"Rows before filter: {before}")\nprint(f"Rows after filter: {after}")\n\nID_COLS = ["pl_name", "hostname"]\nkeep_cols = [c for c in ID_COLS + REQUIRED if c in df_filtered.columns]\ndf_filtered = df_filtered[keep_cols]\n\ndisplay(df_filtered.head(10))\noutput_path = "/content/drive/MyDrive/exoplanets_filtered_required.csv"\ndf_filtered.to_csv(output_path, index=False)\nprint(f"Saved -> {output_path}")'

Normalize equilibrium temperature based on Earth's equilibrium temperature and save to a new dataset.

In [None]:
# Load dataset.
file_path = "exoplanets_filtered_required.csv"
df = pd.read_csv(file_path, comment="#", low_memory=False)

# Filter and keep only 5 columns.
key_columns = ["pl_insol", "pl_rade", "pl_orbsmax", "pl_masse", "pl_eqt"]
df_reduced = df[[col for col in key_columns if col in df.columns]].copy()

# Normalize based on Earth's equilibrium temperature (255 K).
if "pl_eqt" in df_reduced.columns:
    df_reduced["pl_eqt"] = pd.to_numeric(df_reduced["pl_eqt"], errors="coerce") / 255.0

# Output new dataset.
output_path = "exoplanet_habitability_subset_eqt_norm.csv"
df_reduced.to_csv(output_path, index=False)

print(f"Updated dataset saved to: {output_path}")
print(df_reduced.head())

Updated dataset saved to: exoplanet_habitability_subset_eqt_norm.csv
   pl_insol  pl_rade  pl_orbsmax  pl_masse    pl_eqt
0    132.26  6.38913     0.07100   47.6745  3.678431
1   2852.00  1.56500     0.01770    7.6700  7.992157
2   4293.00  1.82000     0.01312    9.0000  8.090196
3   4656.00  1.67600     0.01314    7.8000  9.015686
4   4670.00  1.72500     0.01326    8.2100  9.019608


Clean the dataset by removing duplicates.

In [None]:
# Load dataset.
file_path = "exoplanets_filtered_required.csv"
df = pd.read_csv(file_path, low_memory=False)

# Remove duplicates.
df_unique = df.drop_duplicates(subset="pl_name", keep="first")

# Output clean dataset.
output_path = "lastfilter.csv"
df_unique.to_csv(output_path, index=False)

print(f"Cleaned dataset saved to: {output_path}")
print(f"Original rows: {len(df)}, After removing duplicates: {len(df_unique)}")

Cleaned dataset saved to: /content/drive/MyDrive/lastfilter.csv
Original rows: 615, After removing duplicates: 539


Normalize all columns in dataset.

In [None]:
import pandas as pd

# Load dataset.
file_path = "lastfilter.csv"
df = pd.read_csv(file_path, low_memory=False)

# Drop duplicates.
df_unique = df.drop_duplicates(subset="pl_name", keep="first")

print(f"Original rows: {len(df)}, After removing duplicates: {len(df_unique)}")

# Normalize columns.
cols_to_normalize = ["pl_insol", "pl_rade", "pl_orbsmax", "pl_masse", "pl_eqt"]

df_norm = df_unique.copy()
for col in cols_to_normalize:
    if col in df_norm.columns:
        min_val, max_val = df_norm[col].min(), df_norm[col].max()
        if max_val != min_val:
            df_norm[col] = (df_norm[col] - min_val) / (max_val - min_val)
        else:
            df_norm[col] = 0.0

# Save new cleaned/normalized dataset.
output_path = "FINALDATA.csv"
df_norm.to_csv(output_path, index=False)

print(f"Normalized dataset saved to: {output_path}")
print(df_norm.head())


Original rows: 539, After removing duplicates: 539
Normalized dataset saved to: /content/drive/MyDrive/FINALDATA.csv
            pl_name        hostname  pl_insol   pl_rade  pl_orbsmax  pl_masse  \
0        CoRoT-32 b        CoRoT-32  0.002942  0.261712    0.014225  0.007603   
1  EPIC 201595106 b  EPIC 201595106  0.063515  0.054036    0.002361  0.001214   
2  EPIC 220674823 b  EPIC 220674823  0.095609  0.065013    0.001342  0.001426   
3  EPIC 220674823 c  EPIC 220674823  0.001421  0.105910    0.022260  0.000899   
4  EPIC 229004835 b  EPIC 229004835  0.001548  0.087055    0.025954  0.001650   

     pl_eqt  
0  0.199383  
1  0.482377  
2  0.488809  
3  0.143813  
4  0.164909  


Calculate habitability score.

In [None]:
import pandas as pd

# Load cleaned/normalized dataset.
file_path = "FINALDATA.csv"
df = pd.read_csv(file_path)

# Define weights.
weights = {
    "pl_insol": 0.30,
    "pl_rade": 0.25,
    "pl_masse": 0.20,
    "pl_eqt": 0.15,
    "pl_orbsmax": 0.10
}

# Calculate habitability score.
df["habitability_score"] = 0
for col, w in weights.items():
    if col in df.columns:
        df["habitability_score"] += w * (1 - df[col])

# Save score to new .csv file.
output_path = "Scores.csv"
df.to_csv(output_path, index=False)

# Print first 10 habitability scores.
df[["pl_name", "habitability_score"]].head(10)


Unnamed: 0,pl_name,habitability_score
0,CoRoT-32 b,0.900839
1,EPIC 201595106 b,0.894601
2,EPIC 220674823 b,0.881323
3,EPIC 220674823 c,0.949118
4,EPIC 229004835 b,0.95011
5,EPIC 246851721 b,0.792577
6,EPIC 248847494 b,0.636677
7,EPIC 249893012 b,0.918183
8,EPIC 249893012 c,0.927652
9,EPIC 249893012 d,0.932786


Print the highest 10 habitability scores.

In [18]:
top10 = df.sort_values("habitability_score", ascending=False).head(10)
print(top10[["pl_name", "habitability_score"]])

          pl_name  habitability_score
501  TRAPPIST-1 d            0.989865
502  TRAPPIST-1 e            0.989557
503  TRAPPIST-1 f            0.989238
504  TRAPPIST-1 g            0.988944
23    Gliese 12 b            0.986122
500  TRAPPIST-1 c            0.984829
202  Kepler-138 b            0.983711
241    LHS 1140 b            0.983357
244   LP 791-18 d            0.982905
246    LP 890-9 c            0.982876


Create model based on equation.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load new dataset that includes habitability score.
file_path = "Scores.csv"
df = pd.read_csv(file_path)

# Define x and y.
X = df[["pl_insol", "pl_rade", "pl_masse", "pl_eqt", "pl_orbsmax"]]
y = df["habitability_score"]

# Create train/test split.
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale features and train model.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Evaluate model.
y_pred = model.predict(X_test_scaled)

# Predict a habitability score using normalized variables for a hhpothetical planet.
new_planet = [[0.5, 0.9, 1.2, 1.0, 0.4]]
new_planet_scaled = scaler.transform(new_planet)
predicted_score = model.predict(new_planet_scaled)
print("Predicted habitability score:", predicted_score[0])

Predicted habitability score: 0.19499999999999906




Create a frontend representation by rendering HTML.

In [20]:
from IPython.display import HTML

html_code = """
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8" />
  <title>Exoplanet Habitability Score</title>
  <style>
    body {
      font-family: sans-serif;
      margin: 2rem;
      background: #f9fafb;
      color: #333;
    }
    h1 { margin-bottom: 1rem; }
    .slider-group { margin-bottom: 1.5rem; }
    .label { display: flex; justify-content: space-between; margin-bottom: 0.25rem; }
    input[type="range"] { width: 70%; }
    .bar {
      height: 20px; background: #ddd; border-radius: 10px;
      overflow: hidden; margin: 1rem 0;
    }
    .bar-fill {
      height: 100%; background: #10b981; width: 0%;
      transition: width 0.3s ease;
    }
    .score { font-size: 1.5rem; font-weight: bold; text-align: right; }
    .hint { color:#6b7280; font-size: 0.9rem; margin: 0.25rem 0 1rem; }
  </style>
</head>
<body>
  <h1>Exoplanet Habitability Score</h1>
  <div class="hint">Inputs are relative to Earth, where Earth = 1.00. Score peaks when values are near Earth-like conditions (1.0).</div>

  <div id="sliders"></div>

  <div class="bar">
    <div id="bar-fill" class="bar-fill"></div>
  </div>
  <div class="score" id="score">Score: 1.000</div>

  <script>
    const weights = {
      pl_insol: 0.30,
      pl_rade: 0.25,
      pl_masse: 0.20,
      pl_eqt: 0.15,
      pl_orbsmax: 0.10
    };

    const friendlyLabels = {
      pl_insol: "Insolation Flux",
      pl_rade: "Radius (Earth Radii)",
      pl_masse: "Mass (Earth Masses)",
      pl_eqt: "Equilibrium Temperature (K, Earth ≈ 255 K ⇒ 1.0)",
      pl_orbsmax: "Semi-Major Axis (AU)"
    };

    const targets = {
      pl_insol: 1.0,
      pl_rade: 1.0,
      pl_masse: 1.0,
      pl_eqt: 1.0,
      pl_orbsmax: 1.0
    };

    const params = Object.keys(weights);
    const slidersDiv = document.getElementById("sliders");

    const values = {};
    params.forEach(p => values[p] = 1.0);

    function earthSimilarity(value, target = 1.0) {
      return Math.max(0, 1 - Math.abs(value - target));
    }

    function updateScore() {
      let score = 0;
      for (const p of params) {
        const sim = earthSimilarity(values[p], targets[p]);
        score += weights[p] * sim;
      }
      score = Math.max(0, Math.min(1, score));
      document.getElementById("bar-fill").style.width = (score * 100).toFixed(1) + "%";
      document.getElementById("score").textContent = "Score: " + score.toFixed(3);
    }

    params.forEach(p => {
      const group = document.createElement("div");
      group.className = "slider-group";

      const label = document.createElement("div");
      label.className = "label";
      label.innerHTML = `<span>${friendlyLabels[p]}</span><span id="${p}-val">1.00</span>`;
      group.appendChild(label);

      const slider = document.createElement("input");
      slider.type = "range";
      slider.min = "0";
      slider.max = "2";
      slider.step = "0.01";
      slider.value = "1.00"; // Earth-like default

      slider.oninput = () => {
        const v = parseFloat(slider.value);
        values[p] = v;
        document.getElementById(p + "-val").textContent = v.toFixed(2);
        updateScore();
      };

      group.appendChild(slider);
      slidersDiv.appendChild(group);
    });

    updateScore();
  </script>
</body>
</html>
"""

HTML(html_code)
