In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

df = pd.read_csv("NASA Exoplanet Data/NASA Exoplanet Archive - Planetary Systems Data.csv", low_memory=False)

df['pl_orbeccen'] = pd.to_numeric(df['pl_orbeccen'], errors='coerce')
df['pl_masse'] = pd.to_numeric(df['pl_masse'], errors='coerce')

columns_to_compare = ['pl_orbper', 'pl_rade', 'pl_orbsmax', 'pl_masse', 'pl_orbeccen', 'st_teff', 'st_lum', 'sy_dist', 'pl_dens']
df = df[columns_to_compare]

df = df.dropna()

earth_reference = {
    'pl_rade': 1,  # Earth's radius in Earth radii
    'pl_masse': 1,  # Earth's mass in Earth masses
    'pl_orbper': 365.25,  # Earth's orbital period in days
    'pl_eqt': 288,  # Earth's surface temperature in Kelvin (Note: If not included, make sure it aligns with columns_to_compare)
    'pl_orbeccen': 0.0167,  # Earth's orbital eccentricity
    'st_teff': 5772,  # Sun's effective temperature in Kelvin
    'st_lum': 1,  # Sun's luminosity in Solar luminosities
    'sy_dist': 1,  # Earth's distance from the Sun in parsecs (approx 1)
    'pl_dens': 5.51,  # Earth's density in g/cm^3
}

scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)
earth_scaled = scaler.transform([list(earth_reference.values())])[0]

distances = np.linalg.norm(df_scaled - earth_scaled, axis=1)

df['Earth Similarity'] = distances

df_sorted = df.sort_values(by='Earth Similarity')
#print(df_sorted.head(10))

top_10_planets = df_sorted.head(10)
average_planet = top_10_planets.mean()

# Display the average planet characteristics
print("Average Planet (based on the 10 most Earth-like planets):")
print(average_planet)


         pl_orbper  pl_rade  pl_orbsmax    pl_masse  pl_orbeccen  st_teff  \
14813  1047.835600    9.180     2.02600   158.91500       0.2600   5597.0   
15371   988.881120   11.949     2.02700  1322.16617       0.9200   5746.0   
14998  1071.232050   10.160     1.88300   321.00669       0.2900   4884.0   
31055   394.625080    8.082     1.13700   298.75870       0.0780   5950.0   
23168   288.822000    8.564     1.08960    69.92000       0.1820   5913.0   
27088   328.240170   11.579     0.89800  2129.45033       0.3460   5191.0   
19139   289.862300    2.380     0.84900    36.00000       0.0000   5518.0   
33558   282.525420    9.337     0.83300    87.08498       0.2150   5734.0   
34217   260.790000   11.321     0.82800   875.61726       0.3683   5695.0   
13271   217.831840   12.420     0.75300  1678.14240       0.4010   6145.0   
8084    224.778330    7.084     0.75100   222.47988       0.1760   5914.0   
23426   131.458000    8.160     0.60347    40.36300       0.0420   5606.0   

