In [1]:
import pandas as pd

data = {
    "column_name": ["A", "B", "C", "D"],
    "category_column": ["X", "Y", "X", "Z"],
    "numeric_column": [10, 20, 30, 40]
}
df = pd.DataFrame(data)

In [2]:
grouped = df.groupby("column_name")

for name, group in grouped:
    print(name)
    print(group)
    

A
  column_name category_column  numeric_column
0           A               X              10
B
  column_name category_column  numeric_column
1           B               Y              20
C
  column_name category_column  numeric_column
2           C               X              30
D
  column_name category_column  numeric_column
3           D               Z              40


In [4]:
df.groupby("column_name")["numeric_column"].sum()

column_name
A    10
B    20
C    30
D    40
Name: numeric_column, dtype: int64

In [6]:
df.groupby("column_name").agg({"numeric_column": ["mean", "max", "min"]})

Unnamed: 0_level_0,numeric_column,numeric_column,numeric_column
Unnamed: 0_level_1,mean,max,min
column_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
A,10.0,10,10
B,20.0,20,20
C,30.0,30,30
D,40.0,40,40


In [8]:
pivot = df.pivot_table(
    values="numeric_column",
    index="category_column",
    aggfunc="mean"
)
print(pivot)

                 numeric_column
category_column                
X                          20.0
Y                          20.0
Z                          40.0


In [9]:
def range_func(x):
    return x.max() - x.min()

pivot = df.pivot_table(
    values="numeric_column",
    index="category_column",
    aggfunc=range_func
)
print(pivot)

                 numeric_column
category_column                
X                            20
Y                             0
Z                             0


In [12]:
# Excercise 1: Group data by a categorical column

import pandas as pd

data = {
    "class": ["A", "B", "A", "B", "C", "C"],
    "score": [85, 90, 88, 72, 95, 80],
    "age": [15, 16, 15, 17, 16, 15]
}

df = pd.DataFrame(data)

print(f"Original Dataset:\n{df}\n")

grouped = df.groupby("class").mean()
print(f"Grouped by class:\n{grouped}")

Original Dataset:
  class  score  age
0     A     85   15
1     B     90   16
2     A     88   15
3     B     72   17
4     C     95   16
5     C     80   15

Grouped by class:
       score   age
class             
A       86.5  15.0
B       81.0  16.5
C       87.5  15.5


In [14]:
# Excercise 2: Calculate summary statistics for grouped data

stats = df.groupby("class").agg({
    "score": ["mean", "max", "min"],
    "age": ["mean", "max", "min"]
})

print(f"Summary statistics:\n{stats}")

Summary statistics:
      score           age        
       mean max min  mean max min
class                            
A      86.5  88  85  15.0  15  15
B      81.0  90  72  16.5  17  16
C      87.5  95  80  15.5  16  15
