In [1]:
# Importing necessary libraries
import pandas as pd
import seaborn as sns

# Load the tips dataset from seaborn
df = sns.load_dataset('tips')

# Show the first few rows of the original dataset
print("Original Data:")
print(df.head())

# Step 1: Normalize the 'total_bill' column using apply
# This will scale the 'total_bill' values between 0 and 1
df['scaled_total_bill'] = df['total_bill'].apply(lambda x: (x - df['total_bill'].min()) / (df['total_bill'].max() - df['total_bill'].min()))

# Step 2: Transform the 'sex' column to uppercase using apply
# Convert the 'sex' values to uppercase to standardize the text
df['sex'] = df['sex'].apply(lambda x: x.upper())

# Show the transformed dataset
print("\nTransformed Data:")
print(df.head())

# Step 3: Grouping by 'sex' and calculating the average 'total_bill' per group
grouped_df = df.groupby('sex').agg({'total_bill': 'mean'}).reset_index()

# Display the grouped data
print("\nGrouped Data (Average total_bill per sex):")
print(grouped_df.head())


Original Data:
   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4

Transformed Data:
   total_bill   tip     sex smoker  day    time  size  scaled_total_bill
0       16.99  1.01  FEMALE     No  Sun  Dinner     2           0.291579
1       10.34  1.66    MALE     No  Sun  Dinner     3           0.152283
2       21.01  3.50    MALE     No  Sun  Dinner     3           0.375786
3       23.68  3.31    MALE     No  Sun  Dinner     2           0.431713
4       24.59  3.61  FEMALE     No  Sun  Dinner     4           0.450775

Grouped Data (Average total_bill per sex):
      sex  total_bill
0    MALE   20.744076
1  FEMALE   18.056897
