In [2]:
import pandas as pd
# Instantiate a dictionary of planetary data.
data = {'planet': ['Mercury', 'Venus', 'Earth', 'Mars',
                   'Jupiter', 'Saturn', 'Uranus', 'Neptune'],
       'radius_km': [2440, 6052, 6371, 3390, 69911, 58232,
                     25362, 24622],
       'moons': [0, 0, 1, 2, 80, 83, 27, 14]
        }
# Use pd.DataFrame() function to convert dictionary to dataframe.
planets = pd.DataFrame(data)
planets

Unnamed: 0,planet,radius_km,moons
0,Mercury,2440,0
1,Venus,6052,0
2,Earth,6371,1
3,Mars,3390,2
4,Jupiter,69911,80
5,Saturn,58232,83
6,Uranus,25362,27
7,Neptune,24622,14


In [None]:
# And if you want to select just the planet column as a Series object, you can use regular selection tools like loc[]:
mask = planets['moons'] < 20
planets.loc[mask, 'planet']

0    Mercury
1      Venus
2      Earth
3       Mars
7    Neptune
Name: planet, dtype: object

In [3]:
# Create a Boolean mask of planets with fewer than 20 moons.
mask = planets['moons'] < 20
mask

0     True
1     True
2     True
3     True
4    False
5    False
6    False
7     True
Name: moons, dtype: bool

In [4]:
# Apply the Boolean mask to the dataframe to filter it so it contains
# only the planets with fewer than 20 moons.
planets[mask]

Unnamed: 0,planet,radius_km,moons
0,Mercury,2440,0
1,Venus,6052,0
2,Earth,6371,1
3,Mars,3390,2
7,Neptune,24622,14


In [5]:
# Define the Boolean mask and apply it in a single line.
planets[planets['moons'] < 20]

Unnamed: 0,planet,radius_km,moons
0,Mercury,2440,0
1,Venus,6052,0
2,Earth,6371,1
3,Mars,3390,2
7,Neptune,24622,14


In [6]:
# Boolean masks don't change the data. They're just views.
planets

Unnamed: 0,planet,radius_km,moons
0,Mercury,2440,0
1,Venus,6052,0
2,Earth,6371,1
3,Mars,3390,2
4,Jupiter,69911,80
5,Saturn,58232,83
6,Uranus,25362,27
7,Neptune,24622,14


In [7]:
# You can assign a dataframe view to a named variable.
moons_under_20 = planets[mask]
moons_under_20

Unnamed: 0,planet,radius_km,moons
0,Mercury,2440,0
1,Venus,6052,0
2,Earth,6371,1
3,Mars,3390,2
7,Neptune,24622,14


In [8]:
# Create a Boolean mask of planets with fewer than 10 moons OR more than 50 moons.
mask = (planets['moons'] < 10) | (planets['moons'] > 50)
mask

0     True
1     True
2     True
3     True
4     True
5     True
6    False
7    False
Name: moons, dtype: bool

In [9]:
# Apply the Boolean mask to filter the data.
planets[mask]

Unnamed: 0,planet,radius_km,moons
0,Mercury,2440,0
1,Venus,6052,0
2,Earth,6371,1
3,Mars,3390,2
4,Jupiter,69911,80
5,Saturn,58232,83


In [10]:
# Create a Boolean mask of planets with more than 20 moons, excluding them if they
# have 80 moons or if their radius is less than 50,000 km.
mask = (planets['moons'] > 20) & ~(planets['moons'] == 80) & ~(planets['radius_km'] < 50000)

# Apply the mask
planets[mask]

Unnamed: 0,planet,radius_km,moons
5,Saturn,58232,83


1. &  -------> and ðŸš—

2. | --------> or ðŸš“

3. ~ ---------> not ðŸš•

Important: Each component of a multi-conditional logical statement must be in parentheses. Otherwise, the statement will throw an error or, worse, return something that isnâ€™t what you intended.

For example, here is how to create a Boolean mask that selects all planets that have fewer than 10 moons or greater than 50 moons:

In [17]:
mask = (planets['moons'] < 10) | (planets['moons'] > 50)
mask

0     True
1     True
2     True
3     True
4     True
5     True
6    False
7    False
Name: moons, dtype: bool

Notice that each condition is self-contained in a set of parentheses, and the two conditions are separated by the logical operator, |(or). To apply the mask, call the dataframe and put the statement or the variable itâ€™s assigned to in selector brackets:

In [None]:

mask = (planets['moons'] < 10) | (planets['moons'] > 50)
planets[mask]

Unnamed: 0,planet,radius_km,moons
0,Mercury,2440,0
1,Venus,6052,0
2,Earth,6371,1
3,Mars,3390,2
4,Jupiter,69911,80
5,Saturn,58232,83


Hereâ€™s an example of how to select all planets that have more than 20 moons, but not planets with 80 moons and not planets with a radius less than 50,000 km:

In [21]:
mask = (planets['moons'] > 20) & ~(planets['moons'] == 80) & ~(planets['radius_km'] < 50000)
planets[mask]

Unnamed: 0,planet,radius_km,moons
5,Saturn,58232,83


Note that this returns the same result as the following:

In [20]:
mask = (planets['moons'] > 20) & (planets['moons'] != 80) & (planets['radius_km'] >= 50000)
planets[mask]

Unnamed: 0,planet,radius_km,moons
5,Saturn,58232,83


A Boolean mask is a method of applying a filter to a dataframe. The mask overlays a Boolean grid over your dataframe in order to select only the values in the dataframe that align with the True values of the grid. To create Boolean comparisons, pandas has its own logical operators. These operators are:

* & (and) 

* | (or) 

* ~ (not)

Each criterion of a multi-conditional selection statement must be enclosed in its own set of parentheses. With practice, making complex selection statements in pandas is possible and efficient.