# Pandas Tutorial - Part 3

# Filtering - Using Conditionals to Filter Rows and Columns

### Implementation: Ali Moghanni

*Resources:*

This Jupyter notebook can be obtained at [https://github.com/alimoghanni/Pandas](https://github.com/alimoghanni/Pandas).

updated: **2020-04-01**

In [1]:
# Preamble: useful toolboxes, librairies, functions, etc.

import pandas as pd
import numpy as np

In [2]:
# Python dictionary

people = {
    "first": ["Agatha", "Hercule","Jane", "David", "Nicholle", "Bruce", "Jacob", "Maggie", "Cristiano"], 
    "last": ["Christie", "Poirot", "Marple", "Tom", "Tom", "Lee", "Gyllenhaal", "Gyllenhaal", "Ronaldo"], 
    "email": ["AgathaChristie@mail.com", "HerculePoirot@mail.com", "", "DavidTom@gmail.com", "NicholleTom@gmail.com", "BruceLee@yahoo.com","JacobGyllenhaal@mail.com", "MaggieGyllenhaal@mail.com", "CristianoRonaldo@mail.com"],
    "age": [85, 54, np.nan ,np.nan, 42, 32, 39, 42, 35 ],
    "occupation": ["Novelist","Private investigator", "Amateur detective", "Actor", "Actress", "Martial artist", "Actor", "producer", "footballer"],
    "nationality": ["English", "Belgian", "British", "American", "American", "Chinese", "American", "American", "Portuguese"],
    "born": ["15 September 1890", "", "December 1927", "March 23, 1978", "March 23, 1978", "November 27, 1940", "December 19, 1980", "November 16, 1977", "February 5, 1985"],
    "male": ["No", "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "Yes"]
}

In [3]:
# create pandas DataFrame from dictionary

df = pd.DataFrame(people)

df

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
0,Agatha,Christie,AgathaChristie@mail.com,85.0,Novelist,English,15 September 1890,No
1,Hercule,Poirot,HerculePoirot@mail.com,54.0,Private investigator,Belgian,,Yes
2,Jane,Marple,,,Amateur detective,British,December 1927,No
3,David,Tom,DavidTom@gmail.com,,Actor,American,"March 23, 1978",Yes
4,Nicholle,Tom,NicholleTom@gmail.com,42.0,Actress,American,"March 23, 1978",No
5,Bruce,Lee,BruceLee@yahoo.com,32.0,Martial artist,Chinese,"November 27, 1940",Yes
6,Jacob,Gyllenhaal,JacobGyllenhaal@mail.com,39.0,Actor,American,"December 19, 1980",Yes
7,Maggie,Gyllenhaal,MaggieGyllenhaal@mail.com,42.0,producer,American,"November 16, 1977",No
8,Cristiano,Ronaldo,CristianoRonaldo@mail.com,35.0,footballer,Portuguese,"February 5, 1985",Yes


In [4]:
filt = (df['last'] == 'Tom')

filt

0    False
1    False
2    False
3     True
4     True
5    False
6    False
7    False
8    False
Name: last, dtype: bool

In [5]:
df[filt]  # or df[df['last'] == 'Tom']

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
3,David,Tom,DavidTom@gmail.com,,Actor,American,"March 23, 1978",Yes
4,Nicholle,Tom,NicholleTom@gmail.com,42.0,Actress,American,"March 23, 1978",No


In [6]:
df[df['last'] == 'Tom']

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
3,David,Tom,DavidTom@gmail.com,,Actor,American,"March 23, 1978",Yes
4,Nicholle,Tom,NicholleTom@gmail.com,42.0,Actress,American,"March 23, 1978",No


In [7]:
df.loc[filt]

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
3,David,Tom,DavidTom@gmail.com,,Actor,American,"March 23, 1978",Yes
4,Nicholle,Tom,NicholleTom@gmail.com,42.0,Actress,American,"March 23, 1978",No


In [8]:
df.loc[filt, 'email']

3       DavidTom@gmail.com
4    NicholleTom@gmail.com
Name: email, dtype: object

In [9]:
filt = (df['last'] == 'Tom')  &  (df['first'] == 'Nicholle')

df.loc[filt, 'email']

4    NicholleTom@gmail.com
Name: email, dtype: object

In [10]:
filt = (df['last'] == 'Tom')  |  (df['occupation'] == 'Actor')

df.loc[filt, 'email']

3          DavidTom@gmail.com
4       NicholleTom@gmail.com
6    JacobGyllenhaal@mail.com
Name: email, dtype: object

In [11]:
filt = (df['last'] == 'Tom')  |  (df['occupation'] == 'Actor')

df.loc[filt, ['nationality', 'email']]

Unnamed: 0,nationality,email
3,American,DavidTom@gmail.com
4,American,NicholleTom@gmail.com
6,American,JacobGyllenhaal@mail.com


In [12]:
filt = (df['last'] == 'Tom')  |  (df['occupation'] == 'Actor')

df.loc[~filt, ['nationality', 'email']]

Unnamed: 0,nationality,email
0,English,AgathaChristie@mail.com
1,Belgian,HerculePoirot@mail.com
2,British,
5,Chinese,BruceLee@yahoo.com
7,American,MaggieGyllenhaal@mail.com
8,Portuguese,CristianoRonaldo@mail.com


In [13]:
countries = ['American', 'Belgian']

filt = df['nationality'].isin(countries)

In [14]:
df.loc[filt]

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
1,Hercule,Poirot,HerculePoirot@mail.com,54.0,Private investigator,Belgian,,Yes
3,David,Tom,DavidTom@gmail.com,,Actor,American,"March 23, 1978",Yes
4,Nicholle,Tom,NicholleTom@gmail.com,42.0,Actress,American,"March 23, 1978",No
6,Jacob,Gyllenhaal,JacobGyllenhaal@mail.com,39.0,Actor,American,"December 19, 1980",Yes
7,Maggie,Gyllenhaal,MaggieGyllenhaal@mail.com,42.0,producer,American,"November 16, 1977",No


In [15]:
filt = df['email'].str.contains('gmail', na=False)

In [16]:
df.loc[filt, 'first':'email']

Unnamed: 0,first,last,email
3,David,Tom,DavidTom@gmail.com
4,Nicholle,Tom,NicholleTom@gmail.com
