## Conditional filtering

In [12]:
import pandas as pd

In [13]:
df = pd.read_csv("./data/marketing_campaign.csv", sep='\t')
df.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
0,5524,1957,Graduation,Single,58138.0,0,0,04-09-2012,58,635,...,7,0,0,0,0,0,0,3,11,1
1,2174,1954,Graduation,Single,46344.0,1,1,08-03-2014,38,11,...,5,0,0,0,0,0,0,3,11,0
2,4141,1965,Graduation,Together,71613.0,0,0,21-08-2013,26,426,...,4,0,0,0,0,0,0,3,11,0
3,6182,1984,Graduation,Together,26646.0,1,0,10-02-2014,26,11,...,6,0,0,0,0,0,0,3,11,0
4,5324,1981,PhD,Married,58293.0,1,0,19-01-2014,94,173,...,5,0,0,0,0,0,0,3,11,0


### Campaign for targeting highly educated folks above a certain income with no children at home

In [14]:
# Task
# Extract customers with advanced degrees: Graduate, PhD, or master's degrees
# Income is greater than 65,000
# No children living at home

# Use Education, Kidhome, and Income columns

In [15]:
df['Education'].value_counts()

Education
Graduation    1127
PhD            486
Master         370
2n Cycle       203
Basic           54
Name: count, dtype: int64

In [16]:
# first mask
m1 = df['Education'].isin(['Graduation', 'PhD', 'Master'])
m1

0       True
1       True
2       True
3       True
4       True
        ... 
2235    True
2236    True
2237    True
2238    True
2239    True
Name: Education, Length: 2240, dtype: bool

In [17]:
# second mask
m2 = df['Income'] > 65000
m2

0       False
1       False
2        True
3       False
4       False
        ...  
2235    False
2236    False
2237    False
2238     True
2239    False
Name: Income, Length: 2240, dtype: bool

In [18]:
df['Kidhome'].dtype

dtype('int64')

In [19]:
df['Kidhome'].value_counts()

Kidhome
0    1293
1     899
2      48
Name: count, dtype: int64

In [20]:
# third mask
m3 = df['Kidhome'] == 0
m3

0        True
1       False
2        True
3       False
4       False
        ...  
2235     True
2236    False
2237     True
2238     True
2239    False
Name: Kidhome, Length: 2240, dtype: bool

In [21]:
# selection
df.loc[ m1 & m2 & m3 , :]

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
2,4141,1965,Graduation,Together,71613.0,0,0,21-08-2013,26,426,...,4,0,0,0,0,0,0,3,11,0
15,2114,1946,PhD,Single,82800.0,0,0,24-11-2012,23,1006,...,3,0,0,1,1,0,0,3,11,1
18,6565,1949,Master,Married,76995.0,0,1,28-03-2013,91,1012,...,5,0,0,0,1,0,0,3,11,0
23,4047,1954,PhD,Married,65324.0,0,1,11-01-2014,0,384,...,4,0,0,0,0,0,0,3,11,0
29,1966,1965,PhD,Married,84618.0,0,0,22-11-2013,96,684,...,2,0,0,1,0,0,0,3,11,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2203,2736,1978,Graduation,Married,73807.0,0,1,02-01-2013,88,366,...,2,0,0,0,0,0,0,3,11,0
2211,10469,1981,Graduation,Together,88325.0,0,0,05-02-2014,42,519,...,2,0,0,0,0,0,0,3,11,0
2217,9589,1948,PhD,Widow,82032.0,0,0,05-04-2014,54,332,...,1,0,0,0,0,0,0,3,11,0
2221,7366,1982,Master,Single,75777.0,0,0,04-07-2013,12,712,...,1,0,1,1,0,0,0,3,11,1


In [None]:
# done