## Habitable Planet Data Cleaning

In [64]:
import pandas as pd
import numpy as np

In [65]:
k_data = pd.read_csv("planet.csv")
k_data.columns = ['planet_name', 'num_stars', 'controv_flag', 'orbital_period', 'planet_semi-major_axis', 'planet_radius', 'planet_mass', 'eccentricity', 'insolation_flux', 'equi_temp', 'spectral_type', 'stellar_temp', 'stellar_radius', 'stellar_mass', 'distance']

planet_name = Name of Planet

num_stars = Number of stars

controv_flag = 0 if no controversy, else 1

orbital_period = orbital period [days]

planet_semi-major_axis = distance from star [au]

planet_radius = planet radius [Earth Radius]

planet_mass = planet mass [Earth Mass]

eccentricity = amount by which orbit of planet deviates from a perfect circle (instability in temp)

insolation_flux = Another form of equi_temp [Earth]

equi_temp = planetary equilibrium (The equilibrium temperature of the planet as modeled by a black body heated only by its host star, or for directly imaged planets, the effective temperature of the planet required to match the measured luminosity if the planet were a black body)

spectral_type = Classification based upon Morgan-Keenan system.

stellar_mass = mass of star [Sol Mass]

distance = distance from Earth [parsec]

In [94]:
k_data.head()

Unnamed: 0,planet_name,num_stars,orbital_period,planet_semi-major_axis,planet_radius,planet_mass,eccentricity,insolation_flux,equi_temp,stellar_temp,stellar_radius,stellar_mass,distance
0,11 Com b,2.0,324.62,1.226,,5505.066163,0.2345,,,4808.0,16.38,2.463333,93.1846
1,11 UMi b,1.0,516.219985,1.526667,,3818.094733,0.08,,,4276.5,26.935,2.093333,125.321
2,14 And b,1.0,186.3,0.761667,,1224.550433,0.0,,,4850.5,11.275,1.726667,75.4392
3,14 Her b,1.0,1766.378417,2.81475,,1642.383591,0.362925,,,5296.985,0.976667,0.927143,17.9323
4,16 Cyg B b,3.0,799.375,1.662833,,533.514528,0.676033,,,5728.594,1.14,1.016,21.1397


In [67]:
#Total rows
k_data['planet_name'].count()

35896

In [68]:
#Total planets
k_data['planet_name'].nunique()

5602

In [69]:
#Take duplicates and average the data
avg_data = k_data.groupby('planet_name').mean(numeric_only=True).reset_index()
avg_data.reset_index(drop=True, inplace=True)
avg_data.tail()

Unnamed: 0,planet_name,num_stars,controv_flag,orbital_period,planet_semi-major_axis,planet_radius,planet_mass,eccentricity,insolation_flux,equi_temp,stellar_temp,stellar_radius,stellar_mass,distance
5597,ups And b,2.0,0.0,4.616229,0.058895,,219.960419,0.0302,,,6137.82,1.615,1.25,13.4054
5598,ups And c,2.0,0.0,240.728533,0.827712,,1275.328758,0.238933,,,6117.093333,1.615,1.2625,13.4054
5599,ups And d,2.0,0.0,1285.346167,2.528382,,1616.24259,0.281117,,,6117.093333,1.615,1.2625,13.4054
5600,ups Leo b,1.0,0.0,385.2,1.18,,162.09249,0.32,,,4836.0,11.22,1.48,52.5973
5601,xi Aql b,1.0,0.0,136.86,0.628333,,716.301647,0.0295,,,4810.5,11.43,1.78,56.1858


In [70]:
# Checking NAN counts for each row.
for column in avg_data.columns:
    nan_count = avg_data[column].isna().sum()
    print(f"Number of NaN values in '{column}': {nan_count}")

Number of NaN values in 'planet_name': 0
Number of NaN values in 'num_stars': 0
Number of NaN values in 'controv_flag': 0
Number of NaN values in 'orbital_period': 249
Number of NaN values in 'planet_semi-major_axis': 296
Number of NaN values in 'planet_radius': 1361
Number of NaN values in 'planet_mass': 2907
Number of NaN values in 'eccentricity': 777
Number of NaN values in 'insolation_flux': 1710
Number of NaN values in 'equi_temp': 1462
Number of NaN values in 'stellar_temp': 392
Number of NaN values in 'stellar_radius': 535
Number of NaN values in 'stellar_mass': 28
Number of NaN values in 'distance': 118


In [71]:
#Remove rows that have controversy
avg_data_filtered = avg_data.loc[avg_data['controv_flag'] != 1]
avg_data_filtered.reset_index(drop=True, inplace=True)
avg_data_filtered['controv_flag'].nunique()

1

In [72]:
proj_data = avg_data_filtered
proj_data.head()

Unnamed: 0,planet_name,num_stars,controv_flag,orbital_period,planet_semi-major_axis,planet_radius,planet_mass,eccentricity,insolation_flux,equi_temp,stellar_temp,stellar_radius,stellar_mass,distance
0,11 Com b,2.0,0.0,324.62,1.226,,5505.066163,0.2345,,,4808.0,16.38,2.463333,93.1846
1,11 UMi b,1.0,0.0,516.219985,1.526667,,3818.094733,0.08,,,4276.5,26.935,2.093333,125.321
2,14 And b,1.0,0.0,186.3,0.761667,,1224.550433,0.0,,,4850.5,11.275,1.726667,75.4392
3,14 Her b,1.0,0.0,1766.378417,2.81475,,1642.383591,0.362925,,,5296.985,0.976667,0.927143,17.9323
4,16 Cyg B b,3.0,0.0,799.375,1.662833,,533.514528,0.676033,,,5728.594,1.14,1.016,21.1397


In [78]:
proj_data2 = proj_data.drop(columns=['controv_flag'])
proj_data2.head()

Unnamed: 0,planet_name,num_stars,orbital_period,planet_semi-major_axis,planet_radius,planet_mass,eccentricity,insolation_flux,equi_temp,stellar_temp,stellar_radius,stellar_mass,distance
0,11 Com b,2.0,324.62,1.226,,5505.066163,0.2345,,,4808.0,16.38,2.463333,93.1846
1,11 UMi b,1.0,516.219985,1.526667,,3818.094733,0.08,,,4276.5,26.935,2.093333,125.321
2,14 And b,1.0,186.3,0.761667,,1224.550433,0.0,,,4850.5,11.275,1.726667,75.4392
3,14 Her b,1.0,1766.378417,2.81475,,1642.383591,0.362925,,,5296.985,0.976667,0.927143,17.9323
4,16 Cyg B b,3.0,799.375,1.662833,,533.514528,0.676033,,,5728.594,1.14,1.016,21.1397


# Factoring in Earth

Mass = 5.9722×10^24 kg

Eccentricity =0.0034 and up to 0.058.

Radius = 3,958.8 miles

Orbital Period = 365

Semi-major Axis of Orbit = 149,598,023 km (or 92,955,902 miles)

Insolation Flux = 1400 W/m

Equilibrium Temp = 255 K

Spectral Type = G-type (G-V)

Solar Mass = 1.989 × 10^30 kg

In [93]:
columns = ['planet_name', 'num_stars', 'orbital_period', 'planet_semi-major_axis', 'planet_radius', 'planet_mass', 'eccentricity', 'insolation_flux', 'equi_temp', 'stellar_temp', 'stellar_radius', 'stellar_mass', 'distance']
earth = pd.DataFrame(columns=columns)
earth_data = {'planet_name' : 'Earth',
              'num_stars' : 1,
              'orbital_period' : 365,
              'planet_semi-major_axis' : 1,
              'planet_radius' : ,
              'planet_mass' : ,
              'eccentricity' : ,
              'insolation_flux' : ,
              'equi_temp' : ,
              'stellar_temp' : ,
              'stellar_radius' : ,
              'stellar_mass' : ,
              'distance' : 0
}

Unnamed: 0,planet_name,num_stars,orbital_period,planet_semi-major_axis,planet_radius,planet_mass,eccentricity,insolation_flux,equi_temp,stellar_temp,stellar_radius,stellar_mass,distance


# Steps

1) Calculate planets in habitable zone.

    a) Planetary Equilibrium Temp is easiest. 175 K - 270 K
    
    b) Semi-major axis (distance from spectral body) and spectral type / stellar temp to estimate equi_temp
    


2) Clean data with planets that do not reside in the habitable zone

In [88]:
# How many planets are habitable based upon our current count?
k_data = proj_data2
habitable = k_data[(k_data['equi_temp'] >= 175) & (k_data['equi_temp'] <= 270)]
count = habitable['planet_name'].count()
missing = k_data['equi_temp'].isna().sum()
print(count, "planets with equilibrium temp within acceptable habitable zone parameters.", missing, "NAN values")

78 planets with equilibrium temp within acceptable habitable zone parameters. 1442 NAN values


# Solving Missing Equilibrium Temperature

In [99]:
# Does insolation_flux help?
miss_temp = k_data[k_data['equi_temp'].isna()]
#miss_temp.count()
# 93 counts with insolation flux
insol_flux = k_data[~k_data['insolation_flux'].isna()]
insol_flux.head()

Unnamed: 0,planet_name,num_stars,orbital_period,planet_semi-major_axis,planet_radius,planet_mass,eccentricity,insolation_flux,equi_temp,stellar_temp,stellar_radius,stellar_mass,distance
30,55 Cnc e,2.0,0.994802,0.019644,1.968556,8.667005,0.081625,2657.83,1894.5,5239.990833,0.955,0.922667,12.5855
47,AU Mic b,1.0,8.463095,0.06486,4.0894,22.221567,0.064539,48.92,649.333333,3675.857143,0.75125,0.515714,9.7221
48,AU Mic c,1.0,18.859087,0.11025,2.850667,14.9,0.043595,7.3,456.5,3685.75,0.762,0.522,9.7221
74,BD-210397 b,1.0,1891.0,2.63,,214.0,0.1,0.02,,4051.0,0.75,0.68,23.7324
75,BD-210397 c,1.0,6300.0,5.9,,1142.288175,0.265,0.0,,4051.0,0.75,0.685,23.7324


In [110]:
insol_habit = insol_flux[(insol_flux['insolation_flux'] >= 175) & (insol_flux['insolation_flux'] <= 300)]
both_habit = insol_habit[(insol_habit['equi_temp'] < 800)]
both_habit

Unnamed: 0,planet_name,num_stars,orbital_period,planet_semi-major_axis,planet_radius,planet_mass,eccentricity,insolation_flux,equi_temp,stellar_temp,stellar_radius,stellar_mass,distance
2343,Kepler-1283 b,1.0,12.946239,0.10696,1.706571,,0.0,210.312,771.0,6089.444444,1.498889,1.04125,2921.99


# Conclusion on Insolation Flux

Useless parameter, the planet's who's insolation flux have an equilibrium temperature that is too high to sustain life