In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

df = pd.read_csv("habitable_planets.csv")

columns_to_compare = ['pl_orbper', 'pl_rade', 'pl_orbsmax', 'pl_masse', 'pl_orbeccen', 'st_teff', 'st_lum', 'pl_dens','pl_eqt']
df = df[columns_to_compare]

df.fillna(df.mean(), inplace=True)

earth_reference = {
    'pl_rade': 1,  # Earth's radius in Earth radii
    'pl_masse': 1,  # Earth's mass in Earth masses
    'pl_orbper': 365.25,  # Earth's orbital period in days
    'pl_eqt': 288,  # Earth's surface temperature in Kelvin (Note: If not included, make sure it aligns with columns_to_compare)
    'pl_orbeccen': 0.0167,  # Earth's orbital eccentricity
    'st_teff': 5772,  # Sun's effective temperature in Kelvin
    'st_lum': 1,  # Sun's luminosity in Solar luminosities
    'pl_orbsmax': 1,  # Earth's distance from the Sun in parsecs (approx 1)
    'pl_dens': 5.51,  # Earth's density in g/cm^3
}

scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)
earth_scaled = scaler.transform([list(earth_reference.values())])[0]

distances = np.linalg.norm(df_scaled - earth_scaled, axis=1)

df['Earth Similarity'] = distances

df_sorted = df.sort_values(by='Earth Similarity')
#print(df_sorted.head(10))

top_10_planets = df_sorted.head(10)
average_planet = top_10_planets.mean()

# # Display the average planet characteristics
# print("Average Planet (based on the 10 most Earth-like planets):")
# print(average_planet)


Average Planet (based on the 10 most Earth-like planets):
pl_orbper           2335.120763
pl_rade                3.832544
pl_orbsmax             3.795533
pl_masse            1940.493474
pl_orbeccen            0.353782
st_teff             5378.664267
st_lum                 0.922600
pl_dens                5.133756
pl_eqt               243.533651
Earth Similarity      48.719079
dtype: float64


