In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np

JUPITER_RAD = 11.209  # Radius of Jupiter in Earth radii
JUPITER_MASS = 317.83 # Mass of Jupiter in Earth mass units

# Load the dataset
df = pd.read_csv('cleaned_5250.csv')
print(df.shape)
df.head(10)

(5250, 13)


Unnamed: 0,name,distance,stellar_magnitude,planet_type,discovery_year,mass_multiplier,mass_wrt,radius_multiplier,radius_wrt,orbital_radius,orbital_period,eccentricity,detection_method
0,11 Comae Berenices b,304.0,4.72307,Gas Giant,2007,19.4,Jupiter,1.08,Jupiter,1.29,0.892539,0.23,Radial Velocity
1,11 Ursae Minoris b,409.0,5.013,Gas Giant,2009,14.74,Jupiter,1.09,Jupiter,1.53,1.4,0.08,Radial Velocity
2,14 Andromedae b,246.0,5.23133,Gas Giant,2008,4.8,Jupiter,1.15,Jupiter,0.83,0.508693,0.0,Radial Velocity
3,14 Herculis b,58.0,6.61935,Gas Giant,2002,8.13881,Jupiter,1.12,Jupiter,2.773069,4.8,0.37,Radial Velocity
4,16 Cygni B b,69.0,6.215,Gas Giant,1996,1.78,Jupiter,1.2,Jupiter,1.66,2.2,0.68,Radial Velocity
5,17 Scorpii b,408.0,5.22606,Gas Giant,2020,4.32,Jupiter,1.15,Jupiter,1.45,1.6,0.06,Radial Velocity
6,18 Delphini b,249.0,5.51048,Gas Giant,2008,10.3,Jupiter,1.11,Jupiter,2.6,2.7,0.08,Radial Velocity
7,1RXS J160929.1-210524 b,454.0,12.618,Gas Giant,2008,8.0,Jupiter,1.664,Jupiter,330.0,6505.9,0.0,Direct Imaging
8,24 Bootis b,313.0,5.59,Gas Giant,2018,0.91,Jupiter,1.24,Jupiter,0.19,0.083231,0.04,Radial Velocity
9,24 Sextantis b,235.0,6.4535,Gas Giant,2010,1.99,Jupiter,1.19,Jupiter,1.333,1.239699,0.09,Radial Velocity


In [2]:
# Show the possible planet types in the data
print(df.planet_type.unique())
print(df.planet_type.value_counts())
print(df.radius_wrt.unique())
print(df.mass_wrt.unique())
print(df.shape)

['Gas Giant' 'Super Earth' 'Neptune-like' 'Terrestrial' 'Unknown']
planet_type
Neptune-like    1825
Gas Giant       1630
Super Earth     1595
Terrestrial      195
Unknown            5
Name: count, dtype: int64
['Jupiter' 'Earth' nan]
['Jupiter' 'Earth' nan]
(5250, 13)


In [3]:
# Remove entries where either radius nor mass is not recorded
#df = df.dropna(subset=['radius_wrt', 'mass_wrt'])
print(df['radius_wrt'].unique())
print(df['mass_wrt'].unique())
df.shape

['Jupiter' 'Earth' nan]
['Jupiter' 'Earth' nan]


(5250, 13)

In [4]:
# Calculate radii and masses in terms of Earth radius and mass
df['radius'] = np.where(df['radius_wrt'] == 'Earth', df['radius_multiplier'], df['radius_multiplier'] * JUPITER_RAD)
df['mass'] = np.where(df['mass_wrt'] == 'Earth', df['mass_multiplier'], df['mass_multiplier'] * JUPITER_MASS)
df.head()

Unnamed: 0,name,distance,stellar_magnitude,planet_type,discovery_year,mass_multiplier,mass_wrt,radius_multiplier,radius_wrt,orbital_radius,orbital_period,eccentricity,detection_method,radius,mass
0,11 Comae Berenices b,304.0,4.72307,Gas Giant,2007,19.4,Jupiter,1.08,Jupiter,1.29,0.892539,0.23,Radial Velocity,12.10572,6165.902
1,11 Ursae Minoris b,409.0,5.013,Gas Giant,2009,14.74,Jupiter,1.09,Jupiter,1.53,1.4,0.08,Radial Velocity,12.21781,4684.8142
2,14 Andromedae b,246.0,5.23133,Gas Giant,2008,4.8,Jupiter,1.15,Jupiter,0.83,0.508693,0.0,Radial Velocity,12.89035,1525.584
3,14 Herculis b,58.0,6.61935,Gas Giant,2002,8.13881,Jupiter,1.12,Jupiter,2.773069,4.8,0.37,Radial Velocity,12.55408,2586.757982
4,16 Cygni B b,69.0,6.215,Gas Giant,1996,1.78,Jupiter,1.2,Jupiter,1.66,2.2,0.68,Radial Velocity,13.4508,565.7374


In [5]:
# Drop irrelevant columns
df = df.drop(labels=['radius_wrt', 'mass_wrt', 'radius_multiplier', 'mass_multiplier', 'discovery_year'], axis='columns')
df.head()

Unnamed: 0,name,distance,stellar_magnitude,planet_type,orbital_radius,orbital_period,eccentricity,detection_method,radius,mass
0,11 Comae Berenices b,304.0,4.72307,Gas Giant,1.29,0.892539,0.23,Radial Velocity,12.10572,6165.902
1,11 Ursae Minoris b,409.0,5.013,Gas Giant,1.53,1.4,0.08,Radial Velocity,12.21781,4684.8142
2,14 Andromedae b,246.0,5.23133,Gas Giant,0.83,0.508693,0.0,Radial Velocity,12.89035,1525.584
3,14 Herculis b,58.0,6.61935,Gas Giant,2.773069,4.8,0.37,Radial Velocity,12.55408,2586.757982
4,16 Cygni B b,69.0,6.215,Gas Giant,1.66,2.2,0.68,Radial Velocity,13.4508,565.7374


In [6]:
# Count null values
print(df.isnull().sum())

name                   0
distance              17
stellar_magnitude    161
planet_type            0
orbital_radius       289
orbital_period         0
eccentricity           0
detection_method       0
radius                17
mass                  23
dtype: int64
