Choosing things based on conditions

In [1]:
import pandas as pd

In [2]:
cols = ['name', 'survived', 'pclass', 'fare', 'age', 'sex']

In [4]:
df = pd.read_excel('titanic3.xls', usecols=cols, index_col='name')
df

Unnamed: 0_level_0,pclass,survived,sex,age,fare
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Allen, Miss. Elisabeth Walton",1,1,female,29.0000,211.3375
"Allison, Master. Hudson Trevor",1,1,male,0.9167,151.5500
"Allison, Miss. Helen Loraine",1,0,female,2.0000,151.5500
"Allison, Mr. Hudson Joshua Creighton",1,0,male,30.0000,151.5500
"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",1,0,female,25.0000,151.5500
...,...,...,...,...,...
"Zabour, Miss. Hileni",3,0,female,14.5000,14.4542
"Zabour, Miss. Thamine",3,0,female,,14.4542
"Zakarian, Mr. Mapriededer",3,0,male,26.5000,7.2250
"Zakarian, Mr. Ortin",3,0,male,27.0000,7.2250


In [5]:
# lets filter all the males
# This will create a series
df['sex'] == 'male'

name
Allen, Miss. Elisabeth Walton                      False
Allison, Master. Hudson Trevor                      True
Allison, Miss. Helen Loraine                       False
Allison, Mr. Hudson Joshua Creighton                True
Allison, Mrs. Hudson J C (Bessie Waldo Daniels)    False
                                                   ...  
Zabour, Miss. Hileni                               False
Zabour, Miss. Thamine                              False
Zakarian, Mr. Mapriededer                           True
Zakarian, Mr. Ortin                                 True
Zimmerman, Mr. Leo                                  True
Name: sex, Length: 1309, dtype: bool

In [6]:
# we can now pass that Series in, and it will use it as a filter
df[df['sex'] == 'male']

Unnamed: 0_level_0,pclass,survived,sex,age,fare
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Allison, Master. Hudson Trevor",1,1,male,0.9167,151.5500
"Allison, Mr. Hudson Joshua Creighton",1,0,male,30.0000,151.5500
"Anderson, Mr. Harry",1,1,male,48.0000,26.5500
"Andrews, Mr. Thomas Jr",1,0,male,39.0000,0.0000
"Artagaveytia, Mr. Ramon",1,0,male,71.0000,49.5042
...,...,...,...,...,...
"Yousif, Mr. Wazli",3,0,male,,7.2250
"Yousseff, Mr. Gerious",3,0,male,,14.4583
"Zakarian, Mr. Mapriededer",3,0,male,26.5000,7.2250
"Zakarian, Mr. Ortin",3,0,male,27.0000,7.2250


In [8]:
# lets create one one condition
some_new_condition = df['age'] > 65
df[some_new_condition]

Unnamed: 0_level_0,pclass,survived,sex,age,fare
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Artagaveytia, Mr. Ramon",1,0,male,71.0,49.5042
"Barkworth, Mr. Algernon Henry Wilson",1,1,male,80.0,30.0
"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",1,1,female,76.0,78.85
"Crosby, Capt. Edward Gifford",1,0,male,70.0,71.0
"Goldschmidt, Mr. George B",1,0,male,71.0,34.6542
"Straus, Mr. Isidor",1,0,male,67.0,221.7792
"Mitchell, Mr. Henry Michael",2,0,male,70.0,10.5
"Wheadon, Mr. Edward H",2,0,male,66.0,10.5
"Connors, Mr. Patrick",3,0,male,70.5,7.75
"Svensson, Mr. Johan",3,0,male,74.0,7.775


### Multiple conditions

* For multiple condtions we use Bitwise operators when working with pandas: `&`(and), `|`(or), `~`(not)(Bitwise operators)
* Enclose each conditional expression in parentheses when using comparison operators

Selecting all females that were in 1st class that also survived

In [9]:
my_condition = df['sex'] == 'female'
second_cond = df['pclass'] == 1

# implementing both conditions and getting output
df[my_condition & second_cond]

Unnamed: 0_level_0,pclass,survived,sex,age,fare
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Allen, Miss. Elisabeth Walton",1,1,female,29.0,211.3375
"Allison, Miss. Helen Loraine",1,0,female,2.0,151.5500
"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",1,0,female,25.0,151.5500
"Andrews, Miss. Kornelia Theodosia",1,1,female,63.0,77.9583
"Appleton, Mrs. Edward Dale (Charlotte Lamson)",1,1,female,53.0,51.4792
...,...,...,...,...,...
"Wick, Mrs. George Dennick (Mary Hitchcock)",1,1,female,45.0,164.8667
"Widener, Mrs. George Dunton (Eleanor Elkins)",1,1,female,50.0,211.5000
"Willard, Miss. Constance",1,1,female,21.0,26.5500
"Wilson, Miss. Helen Alice",1,1,female,31.0,134.5000


Lets list out richest kids on the Titanic

In [11]:
rich_kid_cond = (df['age'] < 21) & (df['fare'] > 200) # divided two condition with parenthesis
rich = df[rich_kid_cond]

rich.nlargest(n=5, columns=['age', 'fare'])

Unnamed: 0_level_0,pclass,survived,sex,age,fare
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Fortune, Mr. Charles Alexander",1,0,male,19.0,263.0
"Ryerson, Miss. Emily Borie",1,1,female,18.0,262.375
"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",1,1,female,18.0,227.525
"Madill, Miss. Georgette Alexandra",1,1,female,15.0,211.3375
"Ryerson, Master. John Borie",1,1,male,13.0,262.375
