In [12]:
import pandas as pd

import warnings

# Suppress all FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [13]:
# Load the tips dataset
df = pd.read_csv("data_tips.csv")
print(df.head(10))

   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4
5       25.29  4.71    Male     No  Sun  Dinner     4
6        8.77  2.00    Male     No  Sun  Dinner     2
7       26.88  3.12    Male     No  Sun  Dinner     4
8       15.04  1.96    Male     No  Sun  Dinner     2
9       14.78  3.23    Male     No  Sun  Dinner     2


In [22]:
# step1: Before filtering lets get to know all unique values of some of columns
columns = ['sex', 'smoker', 'day', 'time', 'size']
for col in columns:
    print(f"Column: {col}")
    print(df[col].unique())
    print("-----")


Column: sex
['Female' 'Male']
-----
Column: smoker
['No' 'Yes']
-----
Column: day
['Sun' 'Sat' 'Thur' 'Fri']
-----
Column: time
['Dinner' 'Lunch']
-----
Column: size
[2 3 4 1 6 5]
-----


In [17]:
# Filter: Customers who are females
mask = (df['sex'] == 'Female')
# print(mask)
df_filter = df[mask]
print(df_filter.head()) # print first few records
print("#########################")
print(len(df_filter)) # count of df_filter

    total_bill   tip     sex smoker  day    time  size
0        16.99  1.01  Female     No  Sun  Dinner     2
4        24.59  3.61  Female     No  Sun  Dinner     4
11       35.26  5.00  Female     No  Sun  Dinner     4
14       14.83  3.02  Female     No  Sun  Dinner     2
16       10.33  1.67  Female     No  Sun  Dinner     3
#########################
87


In [18]:
# Filter: Customers who are female smokers
mask = (df['sex'] == 'Female') & (df['smoker'] == 'Yes')
df_filter = df[mask]
print(df_filter.head())
print("#########################")
print(len(df_filter))

    total_bill   tip     sex smoker  day    time  size
67        3.07  1.00  Female    Yes  Sat  Dinner     1
72       26.86  3.14  Female    Yes  Sat  Dinner     2
73       25.28  5.00  Female    Yes  Sat  Dinner     2
92        5.75  1.00  Female    Yes  Fri  Dinner     2
93       16.32  4.30  Female    Yes  Fri  Dinner     2
#########################
33


In [19]:
# Filter: Customers whose total bill < 30 or > 50
mask = (df['total_bill'] < 30) | (df['total_bill'] > 50)
df_filter = df[mask]
print(df_filter.head())
print("#########################")
print(len(df_filter))

   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4
#########################
213


In [20]:
# Filter: Customers who tipped more than 20% of their total bill
df['tip_percent'] = (df['tip'] / df['total_bill']) * 100
mask = (df['tip_percent'] > 20)

df_filter = df[mask]
print(df_filter.head())
print("#########################")
print(len(df_filter))

    total_bill   tip     sex smoker  day    time  size  tip_percent
6         8.77  2.00    Male     No  Sun  Dinner     2    22.805017
9        14.78  3.23    Male     No  Sun  Dinner     2    21.853857
14       14.83  3.02  Female     No  Sun  Dinner     2    20.364127
17       16.29  3.71    Male     No  Sun  Dinner     3    22.774708
18       16.97  3.50  Female     No  Sun  Dinner     3    20.624632
#########################
39


In [21]:
# Filter: Lunch time male customers with total bill between $15 and $30
mask = (df['time'] == 'Lunch') & (df['sex'] == 'Male') & (df['total_bill'].between(15, 30))

df_filter = df[mask]
print(df_filter.head())
print("#########################")
print(len(df_filter))

    total_bill   tip   sex smoker   day   time  size  tip_percent
77       27.20  4.00  Male     No  Thur  Lunch     4    14.705882
78       22.76  3.00  Male     No  Thur  Lunch     2    13.181019
79       17.29  2.71  Male     No  Thur  Lunch     2    15.673800
80       19.44  3.00  Male    Yes  Thur  Lunch     2    15.432099
81       16.66  3.40  Male     No  Thur  Lunch     2    20.408163
#########################
18


In [23]:
# Filter: Female smokers who dined on weekends and gave tips over $4
mask = (
    (df['sex'] == 'Female') &
    (df['smoker'] == 'Yes') &
    (df['day'].isin(['Sat', 'Sun'])) &
    (df['tip'] > 4)
)

df_filter = df[mask]
print(df_filter.head())
print("#########################")
print(len(df_filter))

     total_bill  tip     sex smoker  day    time  size  tip_percent
73        25.28  5.0  Female    Yes  Sat  Dinner     2    19.778481
214       28.17  6.5  Female    Yes  Sat  Dinner     3    23.074192
#########################
2


In [24]:
# Filter: Tables with size >= 4 but average tip per person < $1.5
df['tip_per_person'] = df['tip'] / df['size']
mask = (df['size'] >= 4) & (df['tip_per_person'] < 1.5)

df_filter = df[mask]
print(df_filter.head())
print("#########################")
print(len(df_filter))

    total_bill   tip     sex smoker  day    time  size  tip_percent  \
4        24.59  3.61  Female     No  Sun  Dinner     4    14.680765   
5        25.29  4.71    Male     No  Sun  Dinner     4    18.623962   
7        26.88  3.12    Male     No  Sun  Dinner     4    11.607143   
11       35.26  5.00  Female     No  Sun  Dinner     4    14.180374   
13       18.43  3.00    Male     No  Sun  Dinner     4    16.277808   

    tip_per_person  
4           0.9025  
5           1.1775  
7           0.7800  
11          1.2500  
13          0.7500  
#########################
41
