# Solar System Data Analysis

In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [16]:
pd.set_option('display.float_format', '{:.2e}'.format) # This sets that floating-point numbers will be shown in scientific notation with 2 decimal places
pd.set_option('display.max_columns', None) # For showing all columns when displaying a DataFrame
print("=== SOLAR SYSTEM PLANETARY ANALYSIS ===\n")

# Creating the dictionary
planet_data = {
    'Planet': ['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune'],
    'Distance_from_Sun_km': [57.9e6, 108.2e6, 149.6e6, 227.9e6, 778.3e6, 1427e6, 2871e6, 4497e6],
    'Diameter_km': [4879, 12104, 12756, 6792, 142984, 120536, 51118, 49528],
    'Mass_kg': [3.30e23, 4.87e24, 5.97e24, 6.42e23, 1.90e27, 5.68e26, 8.68e25, 1.02e26],
    'Orbital_Period_days': [88, 225, 365, 687, 4333, 10759, 30687, 60190],
    'Moons': [0, 0, 1, 2, 79, 82, 27, 14],
    'Type': ['Terrestrial', 'Terrestrial', 'Terrestrial', 'Terrestrial', 'Gas Giant', 'Gas Giant', 'Ice Giant', 'Ice Giant']
}

planets_df = pd.DataFrame(planet_data) # Creating the dataframe

planets_df

=== SOLAR SYSTEM PLANETARY ANALYSIS ===



Unnamed: 0,Planet,Distance_from_Sun_km,Diameter_km,Mass_kg,Orbital_Period_days,Moons,Type
0,Mercury,57900000.0,4879,3.3e+23,88,0,Terrestrial
1,Venus,108000000.0,12104,4.87e+24,225,0,Terrestrial
2,Earth,150000000.0,12756,5.97e+24,365,1,Terrestrial
3,Mars,228000000.0,6792,6.42e+23,687,2,Terrestrial
4,Jupiter,778000000.0,142984,1.9e+27,4333,79,Gas Giant
5,Saturn,1430000000.0,120536,5.68e+26,10759,82,Gas Giant
6,Uranus,2870000000.0,51118,8.68e+25,30687,27,Ice Giant
7,Neptune,4500000000.0,49528,1.0199999999999999e+26,60190,14,Ice Giant


# Basic Data Exploration

In [17]:
print("Basic Data Exploration:")

print(f"Shape: {planets_df.shape}") # Shows the shape of the dataframe
print(f"Columns: {list(planets_df.columns)}") # planets_df.columns shows all column names and we arapped it in list() to display it nicely
print(f"First 3 rows: {planets_df.head(3)}") # gives the first 3 rows of our dataframe
print(f"DataFrame Info: {planets_df.info}") # it shows no. of rows & columns, column names with their data types, no. of non-null values, memory usage
print(f"\n Statistical Summary: {planets_df.describe()}") # it shows mean, count, std, min, 25%, 50%, 75% quartiles, max
print(f"\n Data Types: {planets_df.dtypes}") # shows the datatype of each column
print("\n" + "="*50 + "\n")

Basic Data Exploration:
Shape: (8, 7)
Columns: ['Planet', 'Distance_from_Sun_km', 'Diameter_km', 'Mass_kg', 'Orbital_Period_days', 'Moons', 'Type']
First 3 rows:     Planet  Distance_from_Sun_km  Diameter_km  Mass_kg  Orbital_Period_days  \
0  Mercury              5.79e+07         4879 3.30e+23                   88   
1    Venus              1.08e+08        12104 4.87e+24                  225   
2    Earth              1.50e+08        12756 5.97e+24                  365   

   Moons         Type  
0      0  Terrestrial  
1      0  Terrestrial  
2      1  Terrestrial  
DataFrame Info: <bound method DataFrame.info of     Planet  Distance_from_Sun_km  Diameter_km  Mass_kg  Orbital_Period_days  \
0  Mercury              5.79e+07         4879 3.30e+23                   88   
1    Venus              1.08e+08        12104 4.87e+24                  225   
2    Earth              1.50e+08        12756 5.97e+24                  365   
3     Mars              2.28e+08         6792 6.42e+23       

In [18]:
print("Data Selection and Filtering:")

#Selecting SpecifiC Rows
print(" \n Selecting Specific Rows:")
print(planets_df[['Planet', 'Diameter_km', 'Mass_kg']].head())

# Filter rows based on conditions
print("\n Terrestrial Planets")
terrestrial = planets_df[planets_df['Type'] == 'Terrestrial']
print(terrestrial)

print("\n Planets with more than 10 moons")
many_moons = planets_df[planets_df['Moons'] > 10]
print(many_moons)

# Using .loc and .iloc
print("\n Using .loc to select specific rows and columns")
print(planets_df.loc[2:4, ['Planet', 'Distance_from_Sun_km']])

print("\n Using .iloc to select by position")
print(planets_df.iloc[[0, 4, 7], [0, 3, 4]]) # here [[0, 4, 7]] selects rows at positions 0, 4, and 7 and [0, 3, 4] selects columns at positions 0, 3, and 4
print("\n" + "="*50 + "\n")

Data Selection and Filtering:
 
 Selecting Specific Rows:
    Planet  Diameter_km  Mass_kg
0  Mercury         4879 3.30e+23
1    Venus        12104 4.87e+24
2    Earth        12756 5.97e+24
3     Mars         6792 6.42e+23
4  Jupiter       142984 1.90e+27

 Terrestrial Planets
    Planet  Distance_from_Sun_km  Diameter_km  Mass_kg  Orbital_Period_days  \
0  Mercury              5.79e+07         4879 3.30e+23                   88   
1    Venus              1.08e+08        12104 4.87e+24                  225   
2    Earth              1.50e+08        12756 5.97e+24                  365   
3     Mars              2.28e+08         6792 6.42e+23                  687   

   Moons         Type  
0      0  Terrestrial  
1      0  Terrestrial  
2      1  Terrestrial  
3      2  Terrestrial  

 Planets with more than 10 moons
    Planet  Distance_from_Sun_km  Diameter_km  Mass_kg  Orbital_Period_days  \
4  Jupiter              7.78e+08       142984 1.90e+27                 4333   
5   Saturn    