# Planets

In [2]:
# Importing relevant packages
import pandas as pd
import numpy as np

### Instantiating a dictionary of planetary data

In [None]:
data = {'planet': ['Mercury', 'Venus', 'Earth', 'Mars',
                   'Jupiter', 'Saturn', 'Uranus', 'Neptune'],
        'radius_km': [2440, 6052, 6371, 3390, 69911, 58232,
                     25362, 24622],
        'moons': [0, 0, 1, 2, 80, 83, 27, 14],
        'type': ['terrestrial', 'terrestrial', 'terrestrial', 'terrestrial',
                 'gas giant', 'gas giant', 'ice giant', 'ice giant'],
        'rings': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes','yes'],
        'mean_temp_c': [167, 464, 15, -65, -110, -140, -195, -200],
        'magnetic_field': ['yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes']
        }

# Using pd.DataFrame() function to convert dictionary to dataframe.
planets = pd.DataFrame(data)
planets

### Starting data analysis

In [21]:
# Checking variable types
planets.dtypes

planet            object
radius_km          int64
moons              int64
type              object
rings             object
mean_temp_c        int64
magnetic_field    object
dtype: object

Filtering the data

In [9]:
# Creating a Boolean mask of planets with fewer than 20 moons.
mask = planets['moons'] < 20
mask

0     True
1     True
2     True
3     True
4    False
5    False
6    False
7     True
Name: moons, dtype: bool

In [10]:
# Applying the Boolean mask to the dataframe to filter it so it contains
# only the planets with fewer than 20 moons.
planets[mask]

Unnamed: 0,planet,radius_km,moons,type,rings,mean_temp_c,magnetic_field
0,Mercury,2440,0,terrestrial,no,167,yes
1,Venus,6052,0,terrestrial,no,464,no
2,Earth,6371,1,terrestrial,no,15,yes
3,Mars,3390,2,terrestrial,no,-65,no
7,Neptune,24622,14,ice giant,yes,-200,yes


In [12]:
# Defining the Boolean mask and applying it in a single line.
planets[planets['moons'] < 20]

Unnamed: 0,planet,radius_km,moons,type,rings,mean_temp_c,magnetic_field
0,Mercury,2440,0,terrestrial,no,167,yes
1,Venus,6052,0,terrestrial,no,464,no
2,Earth,6371,1,terrestrial,no,15,yes
3,Mars,3390,2,terrestrial,no,-65,no
7,Neptune,24622,14,ice giant,yes,-200,yes


In [13]:
# Assigning a dataframe view to a named variable.
moons_under_20 = planets[mask]
moons_under_20

Unnamed: 0,planet,radius_km,moons,type,rings,mean_temp_c,magnetic_field
0,Mercury,2440,0,terrestrial,no,167,yes
1,Venus,6052,0,terrestrial,no,464,no
2,Earth,6371,1,terrestrial,no,15,yes
3,Mars,3390,2,terrestrial,no,-65,no
7,Neptune,24622,14,ice giant,yes,-200,yes


In [14]:
# Creating a Boolean mask of planets with fewer than 10 moons OR more than 50 moons.
mask = (planets['moons'] < 10) | (planets['moons'] > 50)
mask

0     True
1     True
2     True
3     True
4     True
5     True
6    False
7    False
Name: moons, dtype: bool

In [15]:
# Applying the Boolean mask to filter the data.
planets[mask]

Unnamed: 0,planet,radius_km,moons,type,rings,mean_temp_c,magnetic_field
0,Mercury,2440,0,terrestrial,no,167,yes
1,Venus,6052,0,terrestrial,no,464,no
2,Earth,6371,1,terrestrial,no,15,yes
3,Mars,3390,2,terrestrial,no,-65,no
4,Jupiter,69911,80,gas giant,yes,-110,yes
5,Saturn,58232,83,gas giant,yes,-140,yes


In [17]:
# Creating a Boolean mask of planets with more than 20 moons, excluding them if they
# have 80 moons or if their radius is less than 50,000 km.
mask = (planets['moons'] > 20) & ~(planets['moons'] == 80) & ~(planets['radius_km'] < 50000)

# Applying the mask
planets[mask]

Unnamed: 0,planet,radius_km,moons,type,rings,mean_temp_c,magnetic_field
5,Saturn,58232,83,gas giant,yes,-140,yes


In [18]:
# Using the groupby() function to return a groupby object.
planets.groupby(['type'])

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x1229d7a50>

In [19]:
# Applying the sum() function to the groupby object to get the sum
# of the values in each numerical column for each group.
planets.groupby(['type']).sum()

Unnamed: 0_level_0,planet,radius_km,moons,rings,mean_temp_c,magnetic_field
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
gas giant,JupiterSaturn,128143,163,yesyes,-250,yesyes
ice giant,UranusNeptune,49984,41,yesyes,-395,yesyes
terrestrial,MercuryVenusEarthMars,18253,3,nononono,581,yesnoyesno


In [20]:
# Applying the sum() function to the groupby object to get the sum
# of the values in each numerical column for each group.
planets.groupby(['type']).sum()[["radius_km","moons","mean_temp_c"]]

Unnamed: 0_level_0,radius_km,moons,mean_temp_c
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gas giant,128143,163,-250
ice giant,49984,41,-395
terrestrial,18253,3,581


In [23]:
# Applying the sum function to the groupby object and select
# only the 'moons' column.
planets.groupby(['type']).sum()[['moons']]

Unnamed: 0_level_0,moons
type,Unnamed: 1_level_1
gas giant,163
ice giant,41
terrestrial,3


In [25]:
# Grouping by type and magnetic_field and getting the mean of the values
# in the numeric columns for each group.
planets.groupby(['type', 'magnetic_field'])[["radius_km","moons","mean_temp_c"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,radius_km,moons,mean_temp_c
type,magnetic_field,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gas giant,yes,64071.5,81.5,-125.0
ice giant,yes,24992.0,20.5,-197.5
terrestrial,no,4721.0,1.0,199.5
terrestrial,yes,4405.5,0.5,91.0


In [26]:
# Grouping by type, then using the agg() function to get the mean and median
# of the values in the numeric columns for each group.
planets.groupby(['type'])[["radius_km","moons","mean_temp_c"]].agg(['mean', 'median'])

Unnamed: 0_level_0,radius_km,radius_km,moons,moons,mean_temp_c,mean_temp_c
Unnamed: 0_level_1,mean,median,mean,median,mean,median
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
gas giant,64071.5,64071.5,81.5,81.5,-125.0,-125.0
ice giant,24992.0,24992.0,20.5,20.5,-197.5,-197.5
terrestrial,4563.25,4721.0,0.75,0.5,145.25,91.0


In [27]:
# Defining a function that returns the 90 percentile of an array.
def percentile_90(x):
    return x.quantile(0.9)

In [28]:
# Grouping by type and magnetic_field, then use the agg() function to apply the
# mean and the custom-defined `percentile_90()` function to the numeric
# columns for each group.
planets.groupby(['type', 'magnetic_field'])[["radius_km","moons","mean_temp_c"]].agg(['mean', percentile_90])

Unnamed: 0_level_0,Unnamed: 1_level_0,radius_km,radius_km,moons,moons,mean_temp_c,mean_temp_c
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,percentile_90,mean,percentile_90,mean,percentile_90
type,magnetic_field,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
gas giant,yes,64071.5,68743.1,81.5,82.7,-125.0,-113.0
ice giant,yes,24992.0,25288.0,20.5,25.7,-197.5,-195.5
terrestrial,no,4721.0,5785.8,1.0,1.8,199.5,411.1
terrestrial,yes,4405.5,5977.9,0.5,0.9,91.0,151.8
