In [1]:
import pandas as pd

In [2]:
df_example = pd.DataFrame(
    data={
        'col0': [1, 3, 3, 5, 1],
        'col1': [7, 2, 94, 37, 49],
        'col2': [2, 6, 6, -1, 10],
    }
)
df_example

Unnamed: 0,col0,col1,col2
0,1,7,2
1,3,2,6
2,3,94,6
3,5,37,-1
4,1,49,10


In [3]:
df_example.loc[:, 'col1']

0     7
1     2
2    94
3    37
4    49
Name: col1, dtype: int64

In [4]:
my_mask = pd.Series([True, False, True, True, False])
df_example.loc[my_mask, :]

Unnamed: 0,col0,col1,col2
0,1,7,2
2,3,94,6
3,5,37,-1


In [5]:
df_example.loc[my_mask]

Unnamed: 0,col0,col1,col2
0,1,7,2
2,3,94,6
3,5,37,-1


In [6]:
df_example[my_mask]

Unnamed: 0,col0,col1,col2
0,1,7,2
2,3,94,6
3,5,37,-1


In [8]:
df_example[-my_mask]

Unnamed: 0,col0,col1,col2
1,3,2,6
4,1,49,10


In [9]:
df_example.loc[-my_mask]

Unnamed: 0,col0,col1,col2
1,3,2,6
4,1,49,10


## Masking on Real Data

In [27]:
df = pd.read_csv('csv/small_customers.csv')
df

Unnamed: 0,CustomerID,Age,gender,SeniorCitizen,Partner,Dependents,Tenure,PhoneService,MultipleLines,InternetService,DeviceProtection,PaperlessBilling,PaymentMethod,TotalCharges
0,5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91...,56,Male,1,No,Yes,63,No,Yes,Fiber optic,No,No,Electronic check,118.64
1,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d...,69,Female,0,No,No,62,Yes,No,No,No,Yes,Mailed check,110.47
2,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f...,46,Male,0,No,Yes,68,Yes,Yes,DSL,No,Yes,Bank transfer (automatic),103.28
3,4e07408562bedb8b60ce05c1decfe3ad16b72230967de0...,32,Female,1,No,No,21,Yes,Yes,Fiber optic,Yes,Yes,Bank transfer (automatic),102.50
4,4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328c...,60,Female,1,Yes,Yes,66,Yes,Yes,No,No,Yes,Credit card (automatic),117.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ad48ff99415b2f007dc35b7eb553fd1eb35ebfa2f2f308...,46,Female,0,No,Yes,11,No,No,DSL,No,No,Mailed check,104.65
96,7b1a278f5abe8e9da907fc9c29dfd432d60dc76e17b0fa...,35,Female,0,Yes,Yes,24,Yes,Yes,Fiber optic,Yes,No,Credit card (automatic),103.01
97,d6d824abba4afde81129c71dea75b8100e96338da5f416...,43,Male,1,No,Yes,51,No,Yes,DSL,No,No,Credit card (automatic),102.22
98,29db0c6782dbd5000559ef4d9e953e300e2b479eed26d8...,61,Male,0,No,No,52,No,Yes,Fiber optic,No,No,Bank transfer (automatic),110.12


In [28]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

In [29]:
my_mask_paid_at_least_100 = df['TotalCharges'] >= 100
my_mask_paid_at_least_100

0     True
1     True
2     True
3     True
4     True
      ... 
95    True
96    True
97    True
98    True
99    True
Name: TotalCharges, Length: 100, dtype: bool

In [30]:
ages_of_paid_100 = df.loc[my_mask_paid_at_least_100].get('Age')
ages_of_paid_100

0     56
1     69
2     46
3     32
4     60
      ..
95    46
96    35
97    43
98    61
99    51
Name: Age, Length: 96, dtype: int64

In [31]:
df.loc[my_mask_paid_at_least_100, 'Age']

0     56
1     69
2     46
3     32
4     60
      ..
95    46
96    35
97    43
98    61
99    51
Name: Age, Length: 96, dtype: int64

In [32]:
df.loc[df['TotalCharges'] >= 100, 'Age']

0     56
1     69
2     46
3     32
4     60
      ..
95    46
96    35
97    43
98    61
99    51
Name: Age, Length: 96, dtype: int64

In [34]:
mask_null = df['InternetService'].isna()
mask_null

0     False
1     False
2     False
3     False
4     False
      ...  
95    False
96    False
97    False
98    False
99    False
Name: InternetService, Length: 100, dtype: bool

In [35]:
df[mask_null]

Unnamed: 0,CustomerID,Age,gender,SeniorCitizen,Partner,Dependents,Tenure,PhoneService,MultipleLines,InternetService,DeviceProtection,PaperlessBilling,PaymentMethod,TotalCharges


In [36]:
my_mask_paid_at_least_100 = df['TotalCharges'] >= 100
my_mask_adult = df['Age'] >= 18

In [37]:
df[my_mask_paid_at_least_100 & my_mask_adult]

Unnamed: 0,CustomerID,Age,gender,SeniorCitizen,Partner,Dependents,Tenure,PhoneService,MultipleLines,InternetService,DeviceProtection,PaperlessBilling,PaymentMethod,TotalCharges
0,5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91...,56,Male,1,No,Yes,63,No,Yes,Fiber optic,No,No,Electronic check,118.64
1,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d...,69,Female,0,No,No,62,Yes,No,No,No,Yes,Mailed check,110.47
2,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f...,46,Male,0,No,Yes,68,Yes,Yes,DSL,No,Yes,Bank transfer (automatic),103.28
3,4e07408562bedb8b60ce05c1decfe3ad16b72230967de0...,32,Female,1,No,No,21,Yes,Yes,Fiber optic,Yes,Yes,Bank transfer (automatic),102.50
4,4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328c...,60,Female,1,Yes,Yes,66,Yes,Yes,No,No,Yes,Credit card (automatic),117.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ad48ff99415b2f007dc35b7eb553fd1eb35ebfa2f2f308...,46,Female,0,No,Yes,11,No,No,DSL,No,No,Mailed check,104.65
96,7b1a278f5abe8e9da907fc9c29dfd432d60dc76e17b0fa...,35,Female,0,Yes,Yes,24,Yes,Yes,Fiber optic,Yes,No,Credit card (automatic),103.01
97,d6d824abba4afde81129c71dea75b8100e96338da5f416...,43,Male,1,No,Yes,51,No,Yes,DSL,No,No,Credit card (automatic),102.22
98,29db0c6782dbd5000559ef4d9e953e300e2b479eed26d8...,61,Male,0,No,No,52,No,Yes,Fiber optic,No,No,Bank transfer (automatic),110.12


In [38]:
df[(-my_mask_paid_at_least_100) & my_mask_adult]

Unnamed: 0,CustomerID,Age,gender,SeniorCitizen,Partner,Dependents,Tenure,PhoneService,MultipleLines,InternetService,DeviceProtection,PaperlessBilling,PaymentMethod,TotalCharges
7,7902699be42c8a8e46fbbb4501726517e86b22c56a189f...,38,Male,1,No,No,50,Yes,No,DSL,No,Yes,Bank transfer (automatic),90.29
10,4a44dc15364204a80fe80e9039455cc1608281820fe2b2...,36,Female,1,No,No,68,No,No,DSL,Yes,Yes,Credit card (automatic),80.02
15,e629fa6598d732768f7c726b4b621285f9c3b85303900a...,70,Male,0,Yes,No,47,Yes,No,No,Yes,Yes,Bank transfer (automatic),78.0
17,4523540f1504cd17100c4835e85b7eefd49911580f8eff...,57,Male,1,Yes,No,32,No,No,DSL,Yes,No,Credit card (automatic),65.85


In [39]:
df[my_mask_paid_at_least_100 | my_mask_adult]

Unnamed: 0,CustomerID,Age,gender,SeniorCitizen,Partner,Dependents,Tenure,PhoneService,MultipleLines,InternetService,DeviceProtection,PaperlessBilling,PaymentMethod,TotalCharges
0,5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91...,56,Male,1,No,Yes,63,No,Yes,Fiber optic,No,No,Electronic check,118.64
1,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d...,69,Female,0,No,No,62,Yes,No,No,No,Yes,Mailed check,110.47
2,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f...,46,Male,0,No,Yes,68,Yes,Yes,DSL,No,Yes,Bank transfer (automatic),103.28
3,4e07408562bedb8b60ce05c1decfe3ad16b72230967de0...,32,Female,1,No,No,21,Yes,Yes,Fiber optic,Yes,Yes,Bank transfer (automatic),102.50
4,4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328c...,60,Female,1,Yes,Yes,66,Yes,Yes,No,No,Yes,Credit card (automatic),117.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ad48ff99415b2f007dc35b7eb553fd1eb35ebfa2f2f308...,46,Female,0,No,Yes,11,No,No,DSL,No,No,Mailed check,104.65
96,7b1a278f5abe8e9da907fc9c29dfd432d60dc76e17b0fa...,35,Female,0,Yes,Yes,24,Yes,Yes,Fiber optic,Yes,No,Credit card (automatic),103.01
97,d6d824abba4afde81129c71dea75b8100e96338da5f416...,43,Male,1,No,Yes,51,No,Yes,DSL,No,No,Credit card (automatic),102.22
98,29db0c6782dbd5000559ef4d9e953e300e2b479eed26d8...,61,Male,0,No,No,52,No,Yes,Fiber optic,No,No,Bank transfer (automatic),110.12


In [40]:
df[-(df['TotalCharges'] >= 100) | (df['Age'] >= 18)]

Unnamed: 0,CustomerID,Age,gender,SeniorCitizen,Partner,Dependents,Tenure,PhoneService,MultipleLines,InternetService,DeviceProtection,PaperlessBilling,PaymentMethod,TotalCharges
0,5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91...,56,Male,1,No,Yes,63,No,Yes,Fiber optic,No,No,Electronic check,118.64
1,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d...,69,Female,0,No,No,62,Yes,No,No,No,Yes,Mailed check,110.47
2,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f...,46,Male,0,No,Yes,68,Yes,Yes,DSL,No,Yes,Bank transfer (automatic),103.28
3,4e07408562bedb8b60ce05c1decfe3ad16b72230967de0...,32,Female,1,No,No,21,Yes,Yes,Fiber optic,Yes,Yes,Bank transfer (automatic),102.50
4,4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328c...,60,Female,1,Yes,Yes,66,Yes,Yes,No,No,Yes,Credit card (automatic),117.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ad48ff99415b2f007dc35b7eb553fd1eb35ebfa2f2f308...,46,Female,0,No,Yes,11,No,No,DSL,No,No,Mailed check,104.65
96,7b1a278f5abe8e9da907fc9c29dfd432d60dc76e17b0fa...,35,Female,0,Yes,Yes,24,Yes,Yes,Fiber optic,Yes,No,Credit card (automatic),103.01
97,d6d824abba4afde81129c71dea75b8100e96338da5f416...,43,Male,1,No,Yes,51,No,Yes,DSL,No,No,Credit card (automatic),102.22
98,29db0c6782dbd5000559ef4d9e953e300e2b479eed26d8...,61,Male,0,No,No,52,No,Yes,Fiber optic,No,No,Bank transfer (automatic),110.12


In [41]:
mask_paid_1t_100_or_adult = (-my_mask_paid_at_least_100) | my_mask_adult
df[mask_paid_1t_100_or_adult]

Unnamed: 0,CustomerID,Age,gender,SeniorCitizen,Partner,Dependents,Tenure,PhoneService,MultipleLines,InternetService,DeviceProtection,PaperlessBilling,PaymentMethod,TotalCharges
0,5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91...,56,Male,1,No,Yes,63,No,Yes,Fiber optic,No,No,Electronic check,118.64
1,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d...,69,Female,0,No,No,62,Yes,No,No,No,Yes,Mailed check,110.47
2,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f...,46,Male,0,No,Yes,68,Yes,Yes,DSL,No,Yes,Bank transfer (automatic),103.28
3,4e07408562bedb8b60ce05c1decfe3ad16b72230967de0...,32,Female,1,No,No,21,Yes,Yes,Fiber optic,Yes,Yes,Bank transfer (automatic),102.50
4,4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328c...,60,Female,1,Yes,Yes,66,Yes,Yes,No,No,Yes,Credit card (automatic),117.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ad48ff99415b2f007dc35b7eb553fd1eb35ebfa2f2f308...,46,Female,0,No,Yes,11,No,No,DSL,No,No,Mailed check,104.65
96,7b1a278f5abe8e9da907fc9c29dfd432d60dc76e17b0fa...,35,Female,0,Yes,Yes,24,Yes,Yes,Fiber optic,Yes,No,Credit card (automatic),103.01
97,d6d824abba4afde81129c71dea75b8100e96338da5f416...,43,Male,1,No,Yes,51,No,Yes,DSL,No,No,Credit card (automatic),102.22
98,29db0c6782dbd5000559ef4d9e953e300e2b479eed26d8...,61,Male,0,No,No,52,No,Yes,Fiber optic,No,No,Bank transfer (automatic),110.12
