In [4]:
import pandas as pd

In [5]:
# Create sample data
df = pd.DataFrame({
    'department': ['Sales', 'IT', 'Sales', 'IT', 'Sales'],
    'employee': ['John', 'Jane', 'Bob', 'Mary', 'Alice'],
    'salary': [50000, 60000, 55000, 65000, 52000]
})

#### groupby object
- When we group, we don't get new DataFrames directly
- Instead, we get a GroupBy object that we can operate on

In [6]:
grouped = df.groupby('department')

# Basics
print('type(group): ', type(grouped))
print('type(group["salary"]): ', type(grouped['salary']))
print('# groups: ', len(grouped))
print('sales: ', grouped.get_group('Sales'))

type(group):  <class 'pandas.core.groupby.generic.DataFrameGroupBy'>
type(group["salary"]):  <class 'pandas.core.groupby.generic.SeriesGroupBy'>
# groups:  2
sales:    department employee  salary
0      Sales     John   50000
2      Sales      Bob   55000
4      Sales    Alice   52000


In [7]:
# 1. Calculate average salary by department
avg_salary = grouped['salary'].mean()
print(type(avg_salary))          # This is a Series with the Index taken from the DF
print("Average salary by department:")
print(avg_salary)

<class 'pandas.core.series.Series'>
Average salary by department:
department
IT       62500.000000
Sales    52333.333333
Name: salary, dtype: float64


In [8]:
# 2. Multiple statistics at once
summary = grouped['salary'].agg(['mean', 'min', 'max', 'count'])
print(type(summary))
print("\nSummary statistics by department:")
print(summary)

<class 'pandas.core.frame.DataFrame'>

Summary statistics by department:
                    mean    min    max  count
department                                   
IT          62500.000000  60000  65000      2
Sales       52333.333333  50000  55000      3


In [9]:
# 3. Different operations for different columns
custom_agg = grouped.agg({
    'salary': ['mean', 'max'],
    'employee': 'count'
})
print(custom_agg.columns)
print("\nCustom aggregations:")
print(custom_agg)

MultiIndex([(  'salary',  'mean'),
            (  'salary',   'max'),
            ('employee', 'count')],
           )

Custom aggregations:
                  salary        employee
                    mean    max    count
department                              
IT          62500.000000  65000        2
Sales       52333.333333  55000        3
