# Filtering DataFrames with many Conditions 

##  AND

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('titanic.csv',sep='\t')

In [3]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
filter_sex = df.Sex == 'male'

In [5]:
filter_age = df.Age > 20

In [6]:
(filter_sex & filter_age).head()

0     True
1    False
2    False
3    False
4     True
dtype: bool

In [7]:
#getting the first 10 rows (passengers: men AND over 20 years old)
male_survivor = df.loc[filter_sex & filter_age,['Name','Age','Pclas','Survived','Sex']]
male_survivor.head(10)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Unnamed: 0,Name,Age,Pclas,Survived,Sex
0,"Braund, Mr. Owen Harris",22.0,,0,male
4,"Allen, Mr. William Henry",35.0,,0,male
6,"McCarthy, Mr. Timothy J",54.0,,0,male
13,"Andersson, Mr. Anders Johan",39.0,,0,male
20,"Fynney, Mr. Joseph J",35.0,,0,male
21,"Beesley, Mr. Lawrence",34.0,,1,male
23,"Sloper, Mr. William Thompson",28.0,,1,male
30,"Uruchurtu, Don. Manuel E",40.0,,0,male
33,"Wheadon, Mr. Edward H",66.0,,0,male
34,"Meyer, Mr. Edgar Joseph",28.0,,0,male


In [8]:
male_survivor.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 62 entries, 0 to 155
Data columns (total 5 columns):
Name        62 non-null object
Age         62 non-null float64
Pclas       0 non-null float64
Survived    62 non-null int64
Sex         62 non-null object
dtypes: float64(2), int64(1), object(2)
memory usage: 2.9+ KB


In [9]:
#note the only 11% of males over 20 yo, survived the disaster
male_survivor.describe()

Unnamed: 0,Age,Pclas,Survived
count,62.0,0.0,62.0
mean,35.016129,,0.112903
std,13.313671,,0.319058
min,21.0,,0.0
25%,24.25,,0.0
50%,32.0,,0.0
75%,41.625,,0.0
max,71.0,,1.0


In [10]:
#in relation to total passengers, 34% survived the disaster
total_passengers = df[['Age','Pclass','Survived']]
total_passengers.describe()

Unnamed: 0,Age,Pclass,Survived
count,126.0,156.0,156.0
mean,28.141508,2.423077,0.346154
std,14.61388,0.795459,0.477275
min,0.83,1.0,0.0
25%,19.0,2.0,0.0
50%,26.0,3.0,0.0
75%,35.0,3.0,1.0
max,71.0,3.0,1.0


## OR

In [11]:
filter_female = df.Sex == 'female'
filter_female.head(10)

0    False
1     True
2     True
3     True
4    False
5    False
6    False
7    False
8     True
9     True
Name: Sex, dtype: bool

In [12]:
filter_child = df.Age < 14
filter_child.head(10)

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7     True
8    False
9    False
Name: Age, dtype: bool

In [13]:
(filter_female | filter_child).head(10)

0    False
1     True
2     True
3     True
4    False
5    False
6    False
7     True
8     True
9     True
dtype: bool

In [14]:
#getting the first 10 rows (passengers: women OR children under 14 yo)
woman_or_child = df.loc[filter_female | filter_child,['Name','Pclas','Sex','Age','Survived']]
woman_or_child.head(10)

Unnamed: 0,Name,Pclas,Sex,Age,Survived
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",,female,38.0,1
2,"Heikkinen, Miss. Laina",,female,26.0,1
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",,female,35.0,1
7,"Palsson, Master. Gosta Leonard",,male,2.0,0
8,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",,female,27.0,1
9,"Nasser, Mrs. Nicholas (Adele Achem)",,female,14.0,1
10,"Sandstrom, Miss. Marguerite Rut",,female,4.0,1
11,"Bonnell, Miss. Elizabeth",,female,58.0,1
14,"Vestrom, Miss. Hulda Amanda Adolfina",,female,14.0,0
15,"Hewlett, Mrs. (Mary D Kingcome)",,female,55.0,1


In [15]:
woman_or_child.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 63 entries, 1 to 151
Data columns (total 5 columns):
Name        63 non-null object
Pclas       0 non-null float64
Sex         63 non-null object
Age         54 non-null float64
Survived    63 non-null int64
dtypes: float64(2), int64(1), object(2)
memory usage: 3.0+ KB


In [16]:
#note that 66% of women and children survived the disaster
woman_or_child.describe()

Unnamed: 0,Pclas,Age,Survived
count,0.0,54.0,63.0
mean,,22.01537,0.666667
std,,13.733073,0.475191
min,,0.83,0.0
25%,,14.0,0.0
50%,,20.5,1.0
75%,,29.75,1.0
max,,58.0,1.0


In [17]:
# 34% of total passengers survived the disaster
total_passengers.describe()

Unnamed: 0,Age,Pclass,Survived
count,126.0,156.0,156.0
mean,28.141508,2.423077,0.346154
std,14.61388,0.795459,0.477275
min,0.83,1.0,0.0
25%,19.0,2.0,0.0
50%,26.0,3.0,0.0
75%,35.0,3.0,1.0
max,71.0,3.0,1.0
