In [1]:
import pandas as pd

# Your dataset
data = {
    'Name': [
        'Zane', 'Cathy', 'Leo', 'Mona', 'Alice', 'David', 'Grace', 'Nate', 'Ben', 'Olivia',
        'Bob', 'Victor', 'Quinn', 'Xander', 'Ella', 'Paul', 'Frank', 'Kate', 'Wendy', 'Henry',
        'Jack', 'Tina', 'Sam', 'Ivy', 'Cara', 'Rita', 'Yara', 'Dan', 'Uma', 'Abby'
    ],
    'Age': [
        42, 21, 30, 28, 22, 25, 20, 26, 44, 33,
        24, 40, 29, 41, 23, 31, 19, 30, 38, 22,
        21, 30, 34, 24, 43, 32, 39, 45, 36, 19
    ],
    'Income': [
        62500, 33000, 47000, 50000, 32000, 35000, 34000, 46000, 61500, 49000,
        31000, 63000, 52000, 60000, 30000, 51000, 29000, 47000, 61000, 36000,
        30000, 48500, 47000, 31000, 63500, 49500, 64000, 64500, 62000, 29000
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Define age bins and labels
bins = [18, 24, 29, 34, 39, 45]
labels = ['19-24', '25-29', '30-34', '35-39', '40-45']

# Create age group categorical variable
df['AgeGroup'] = pd.cut(df['Age'], bins=bins, labels=labels, right=True)

In [3]:
# Group by AgeGroup and calculate summary statistics for Age and Income
summary_stats = df.groupby('AgeGroup',observed=True).agg({
    'Age': ['mean', 'median', 'min', 'max', 'std'],
    'Income': ['mean', 'median', 'min', 'max', 'std']
}).round(2)

In [4]:
print(summary_stats)

            Age                         Income                                
           mean median min max   std      mean   median    min    max      std
AgeGroup                                                                      
19-24     21.50   21.5  19  24  1.84  31500.00  31000.0  29000  36000  2273.03
25-29     27.00   27.0  25  29  1.83  45750.00  48000.0  35000  52000  7588.37
30-34     31.43   31.0  30  34  1.62  48428.57  48500.0  47000  51000  1539.17
35-39     37.67   38.0  36  39  1.53  62333.33  62000.0  61000  64000  1527.53
40-45     42.50   42.5  40  45  1.87  62500.00  62750.0  60000  64500  1581.14


In [5]:
print(df)

      Name  Age  Income AgeGroup
0     Zane   42   62500    40-45
1    Cathy   21   33000    19-24
2      Leo   30   47000    30-34
3     Mona   28   50000    25-29
4    Alice   22   32000    19-24
5    David   25   35000    25-29
6    Grace   20   34000    19-24
7     Nate   26   46000    25-29
8      Ben   44   61500    40-45
9   Olivia   33   49000    30-34
10     Bob   24   31000    19-24
11  Victor   40   63000    40-45
12   Quinn   29   52000    25-29
13  Xander   41   60000    40-45
14    Ella   23   30000    19-24
15    Paul   31   51000    30-34
16   Frank   19   29000    19-24
17    Kate   30   47000    30-34
18   Wendy   38   61000    35-39
19   Henry   22   36000    19-24
20    Jack   21   30000    19-24
21    Tina   30   48500    30-34
22     Sam   34   47000    30-34
23     Ivy   24   31000    19-24
24    Cara   43   63500    40-45
25    Rita   32   49500    30-34
26    Yara   39   64000    35-39
27     Dan   45   64500    40-45
28     Uma   36   62000    35-39
29    Abby

In [6]:
# Flatten MultiIndex columns
summary_stats.columns = ['_'.join(col) for col in summary_stats.columns]

In [7]:
print(df)

      Name  Age  Income AgeGroup
0     Zane   42   62500    40-45
1    Cathy   21   33000    19-24
2      Leo   30   47000    30-34
3     Mona   28   50000    25-29
4    Alice   22   32000    19-24
5    David   25   35000    25-29
6    Grace   20   34000    19-24
7     Nate   26   46000    25-29
8      Ben   44   61500    40-45
9   Olivia   33   49000    30-34
10     Bob   24   31000    19-24
11  Victor   40   63000    40-45
12   Quinn   29   52000    25-29
13  Xander   41   60000    40-45
14    Ella   23   30000    19-24
15    Paul   31   51000    30-34
16   Frank   19   29000    19-24
17    Kate   30   47000    30-34
18   Wendy   38   61000    35-39
19   Henry   22   36000    19-24
20    Jack   21   30000    19-24
21    Tina   30   48500    30-34
22     Sam   34   47000    30-34
23     Ivy   24   31000    19-24
24    Cara   43   63500    40-45
25    Rita   32   49500    30-34
26    Yara   39   64000    35-39
27     Dan   45   64500    40-45
28     Uma   36   62000    35-39
29    Abby

In [8]:
print(summary_stats)

          Age_mean  Age_median  Age_min  Age_max  Age_std  Income_mean  \
AgeGroup                                                                 
19-24        21.50        21.5       19       24     1.84     31500.00   
25-29        27.00        27.0       25       29     1.83     45750.00   
30-34        31.43        31.0       30       34     1.62     48428.57   
35-39        37.67        38.0       36       39     1.53     62333.33   
40-45        42.50        42.5       40       45     1.87     62500.00   

          Income_median  Income_min  Income_max  Income_std  
AgeGroup                                                     
19-24           31000.0       29000       36000     2273.03  
25-29           48000.0       35000       52000     7588.37  
30-34           48500.0       47000       51000     1539.17  
35-39           62000.0       61000       64000     1527.53  
40-45           62750.0       60000       64500     1581.14  


In [9]:
summary_stats.info()

<class 'pandas.core.frame.DataFrame'>
CategoricalIndex: 5 entries, 19-24 to 40-45
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Age_mean       5 non-null      float64
 1   Age_median     5 non-null      float64
 2   Age_min        5 non-null      int64  
 3   Age_max        5 non-null      int64  
 4   Age_std        5 non-null      float64
 5   Income_mean    5 non-null      float64
 6   Income_median  5 non-null      float64
 7   Income_min     5 non-null      int64  
 8   Income_max     5 non-null      int64  
 9   Income_std     5 non-null      float64
dtypes: float64(6), int64(4)
memory usage: 617.0 bytes


In [10]:
summary_stats.head()

Unnamed: 0_level_0,Age_mean,Age_median,Age_min,Age_max,Age_std,Income_mean,Income_median,Income_min,Income_max,Income_std
AgeGroup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
19-24,21.5,21.5,19,24,1.84,31500.0,31000.0,29000,36000,2273.03
25-29,27.0,27.0,25,29,1.83,45750.0,48000.0,35000,52000,7588.37
30-34,31.43,31.0,30,34,1.62,48428.57,48500.0,47000,51000,1539.17
35-39,37.67,38.0,36,39,1.53,62333.33,62000.0,61000,64000,1527.53
40-45,42.5,42.5,40,45,1.87,62500.0,62750.0,60000,64500,1581.14


In [11]:
# Display summary statistics
print("Summary statistics grouped by AgeGroup:\n")
print(summary_stats)

Summary statistics grouped by AgeGroup:

          Age_mean  Age_median  Age_min  Age_max  Age_std  Income_mean  \
AgeGroup                                                                 
19-24        21.50        21.5       19       24     1.84     31500.00   
25-29        27.00        27.0       25       29     1.83     45750.00   
30-34        31.43        31.0       30       34     1.62     48428.57   
35-39        37.67        38.0       36       39     1.53     62333.33   
40-45        42.50        42.5       40       45     1.87     62500.00   

          Income_median  Income_min  Income_max  Income_std  
AgeGroup                                                     
19-24           31000.0       29000       36000     2273.03  
25-29           48000.0       35000       52000     7588.37  
30-34           48500.0       47000       51000     1539.17  
35-39           62000.0       61000       64000     1527.53  
40-45           62750.0       60000       64500     1581.14  


In [12]:
# Create list of mean income values per age group
mean_income_list = summary_stats['Income_mean'].tolist()
print("\nMean income per age group:", mean_income_list)


Mean income per age group: [31500.0, 45750.0, 48428.57, 62333.33, 62500.0]
