In [2]:
pip install opencv-python


Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Downloading opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (63.0 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.0/63.0 MB[0m [31m474.6 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:04[0m
Installing collected packages: opencv-python
Successfully installed opencv-python-4.11.0.86

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Step 1: Generate Synthetic Data
def generate_synthetic_data(num_samples=1000):
    np.random.seed(42)
    gases = ["O₂", "CO₂", "CH₄", "H₂O", "O₃"]
    data = {gas: np.random.uniform(0, 1, num_samples) for gas in gases}
    data['habitability_score'] = (
        0.4 * data["O₂"] + 0.3 * data["H₂O"] - 0.2 * data["CO₂"] - 0.1 * data["CH₄"] + 0.2 * data["O₃"]
    ) * 100  # Normalized score (0-100)
    df = pd.DataFrame(data)
    df.to_csv("synthetic_exoplanet_data.csv", index=False)
    return df

# Step 2: Train ML Model
def train_model(df):
    X = df.drop(columns=["habitability_score"])
    y = df["habitability_score"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("MSE:", mean_squared_error(y_test, y_pred))
    return model

# Step 3: Extract Data from Spectra Images
def extract_spectrum_data(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError("Error loading image. Check file path.")
    edges = cv2.Canny(img, 50, 150)
    y_indices, x_indices = np.nonzero(edges)
    if len(x_indices) == 0 or len(y_indices) == 0:
        raise ValueError("No spectral features detected.")
    x_values = np.interp(x_indices, (x_indices.min(), x_indices.max()), (0.5, 5.5))
    y_values = np.interp(y_indices, (y_indices.min(), y_indices.max()), (0, 1e-16))
    return x_values, y_values

# Step 4: Detect Absorption Features
def detect_absorption_features(wavelength, flux):
    if len(wavelength) == 0 or len(flux) == 0:
        raise ValueError("Invalid wavelength or flux data.")
    peaks, _ = find_peaks(-flux, distance=5)
    return wavelength[peaks]

# Step 5: Identify Atmospheric Gases
def identify_gases(absorption_wavelengths):
    known_gases = {"O₂": [0.76], "CO₂": [2.7, 4.3, 15], "CH₄": [1.65, 2.3, 7.7], "H₂O": [0.94, 1.13, 1.4, 1.9, 6.3], "O₃": [9.6]}
    detected_gases = {}
    tolerance = 0.1
    for gas, bands in known_gases.items():
        detected_gases[gas] = [w for w in absorption_wavelengths if any(abs(w - b) < tolerance for b in bands)]
    return detected_gases

# Step 6: Predict Habitability from Spectra
def predict_habitability(model, image_path):
    try:
        wavelength, flux = extract_spectrum_data(image_path)
        absorption_wavelengths = detect_absorption_features(wavelength, flux)
        gases = identify_gases(absorption_wavelengths)
        features = {gas: len(gases.get(gas, [])) for gas in ["O₂", "CO₂", "CH₄", "H₂O", "O₃"]}
        df = pd.DataFrame([features])
        habitability = model.predict(df)[0]
    except ValueError as e:
        print("Error in processing spectrum:", e)
        return None, []
    
    reasons = []
    expected_levels = {"O₂": 0.3, "H₂O": 0.2, "CO₂": 0.1, "CH₄": 0.05, "O₃": 0.02}
    for gas, level in expected_levels.items():
        if features.get(gas, 0) < level:
            reasons.append(f"{gas} is below expected level ({features[gas]:.2f} vs {level})")
    
    print(f"Predicted Habitability Score: {habitability:.2f}%")
    if reasons:
        print("Reasons for low habitability:")
        for reason in reasons:
            print("-", reason)
    
    return habitability, reasons

# Generate Data and Train Model
df = generate_synthetic_data()
model = train_model(df)

# Example Test on Spectrum Image
# predict_habitability(model, "spectra_images/sample_spectrum.png")


MSE: 13.460184604648084


In [5]:
predict_habitability(model, "2.png")

Predicted Habitability Score: 5.73%
Reasons for low habitability:
- O₂ is below expected level (0.00 vs 0.3)
- H₂O is below expected level (0.00 vs 0.2)
- CO₂ is below expected level (0.00 vs 0.1)
- CH₄ is below expected level (0.00 vs 0.05)
- O₃ is below expected level (0.00 vs 0.02)


(np.float64(5.7310444803677685),
 ['O₂ is below expected level (0.00 vs 0.3)',
  'H₂O is below expected level (0.00 vs 0.2)',
  'CO₂ is below expected level (0.00 vs 0.1)',
  'CH₄ is below expected level (0.00 vs 0.05)',
  'O₃ is below expected level (0.00 vs 0.02)'])

In [8]:
import numpy as np
import pandas as pd

np.random.seed(42)

def generate_synthetic_data(n_samples=1000):
    co2 = np.random.uniform(0.02, 0.1, n_samples)
    o2 = np.random.uniform(20, 21, n_samples)
    n2 = np.random.uniform(77, 78, n_samples)
    h2o = np.random.uniform(0, 4, n_samples)
    
    pressure = np.random.uniform(0.8, 1.2, n_samples)
    albedo = np.random.uniform(0.2, 0.4, n_samples)
    temp = np.random.uniform(273, 373, n_samples)
    
    greenhouse = co2 * 0.5 + h2o * 0.3  # Simplified greenhouse effect
    
    # Now, habitability as a score:
    habitability_score = (
        np.maximum(0, (1 - np.abs(pressure - 1) / 0.2)) * 0.2 +
        np.maximum(0, (1 - np.abs(albedo - 0.3) / 0.1)) * 0.2 +
        np.maximum(0, (1 - np.abs(temp - 300) / 50)) * 0.2 +
        np.maximum(0, (1 - np.abs(o2 - 20.95) / 0.95)) * 0.1 +
        np.maximum(0, (1 - np.abs(n2 - 78.09) / 0.91)) * 0.1 +
        np.maximum(0, (1 - np.abs(h2o - 2) / 2)) * 0.1 +
        np.maximum(0, (1 - np.abs(greenhouse - 0.3) / 0.3)) * 0.1
    )
    
    # Normalize the score to be between 0 and 100%
    habitability_score *= 100
    
    return pd.DataFrame({
        'CO2': co2, 'O2': o2, 'N2': n2, 'H2O': h2o,
        'Pressure': pressure, 'Albedo': albedo, 
        'Temperature': temp, 'Greenhouse': greenhouse,
        'Habitability': habitability_score
    })

data = generate_synthetic_data(1000)

In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

np.random.seed(42)

def generate_synthetic_data(n_samples=1000):
    co2 = np.random.uniform(0.02, 0.1, n_samples)
    o2 = np.random.uniform(20, 21, n_samples)
    n2 = np.random.uniform(77, 78, n_samples)
    h2o = np.random.uniform(0, 4, n_samples)
    
    pressure = np.random.uniform(0.8, 1.2, n_samples)
    albedo = np.random.uniform(0.2, 0.4, n_samples)
    temp = np.random.uniform(273, 373, n_samples)
    
    greenhouse = co2 * 0.5 + h2o * 0.3  # Simplified greenhouse effect
    
    # Now, habitability as a score:
    habitability_score = (
        np.maximum(0, (1 - np.abs(pressure - 1) / 0.2)) * 0.2 +
        np.maximum(0, (1 - np.abs(albedo - 0.3) / 0.1)) * 0.2 +
        np.maximum(0, (1 - np.abs(temp - 300) / 50)) * 0.2 +
        np.maximum(0, (1 - np.abs(o2 - 20.95) / 0.95)) * 0.1 +
        np.maximum(0, (1 - np.abs(n2 - 78.09) / 0.91)) * 0.1 +
        np.maximum(0, (1 - np.abs(h2o - 2) / 2)) * 0.1 +
        np.maximum(0, (1 - np.abs(greenhouse - 0.3) / 0.3)) * 0.1
    )
    
    # Normalize the score to be between 0 and 100%
    habitability_score *= 100
    
    return pd.DataFrame({
        'CO2': co2, 'O2': o2, 'N2': n2, 'H2O': h2o,
        'Pressure': pressure, 'Albedo': albedo, 
        'Temperature': temp, 'Greenhouse': greenhouse,
        'Habitability': habitability_score
    })

# Generate data
data = generate_synthetic_data(4500)

# Prepare data for model training
X = data.drop('Habitability', axis=1)
y = data['Habitability']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Use RandomForestRegressor for regression tasks
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))

MSE: 15.965760929053165
R2 Score: 0.8850274291794012


In [12]:
X_test

Unnamed: 0,CO2,O2,N2,H2O,Pressure,Albedo,Temperature,Greenhouse
2323,0.044750,20.965770,77.046698,2.298568,1.159469,0.260366,372.056487,0.711945
3686,0.094332,20.240996,77.639617,1.636757,0.893476,0.314374,366.587886,0.538193
2974,0.050748,20.398250,77.158133,3.262487,0.917612,0.285023,308.526181,1.004120
2992,0.064217,20.336848,77.960013,1.580500,1.105083,0.376034,288.825956,0.506258
468,0.037888,20.957135,77.512170,3.048930,0.949111,0.238196,340.637684,0.933623
...,...,...,...,...,...,...,...,...
911,0.041445,20.273475,77.908190,3.199318,1.025481,0.268385,311.043580,0.980518
809,0.029059,20.893934,77.533590,0.954803,1.126217,0.329412,274.966259,0.300970
2940,0.041226,20.639263,77.090838,1.579415,1.126135,0.294305,356.550993,0.494437
3710,0.020823,20.174010,77.158239,3.003159,1.111432,0.357485,371.428133,0.911359


In [26]:
import pickle

In [27]:
with open('habitat.pkl', 'wb') as f:
    pickle.dump(model, f)

In [28]:
x = [[0.04,21,78,2,1,0.3,288,33]]
model.predict(x)




array([72.52757028])