## Grouping
Grouping in Pandas is used to split data into groups based on some criteria, apply a function to each group, and then combine the results.

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame({
    'Name' : ['Alice', 'Bob', 'Elizabeth', 'Smith', 'Rahila', 'John', 'Mehak'],
    'Department' : ['IT', 'Finance', 'HR', 'IT', 'IT', 'Finance', 'HR'],
    'Salary' : [20000, 36000, 100000, 40000, 40000, 25000, 50000],
    'Gender' : ['M', 'M', 'F', 'M', 'F', 'M', 'F']
}, index=[1, 2, 3, 4, 5, 6, 7])

df

Unnamed: 0,Name,Department,Salary,Gender
1,Alice,IT,20000,M
2,Bob,Finance,36000,M
3,Elizabeth,HR,100000,F
4,Smith,IT,40000,M
5,Rahila,IT,40000,F
6,John,Finance,25000,M
7,Mehak,HR,50000,F


In [None]:
df.groupby('Department')['Salary'].mean()

Department
Finance    30500.000000
HR         75000.000000
IT         33333.333333
Name: Salary, dtype: float64


In [8]:
df.groupby(['Department', 'Gender'])['Salary'].agg(['mean', 'max', 'min', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,max,min,sum
Department,Gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Finance,M,30500.0,36000,25000,61000
HR,F,75000.0,100000,50000,150000
IT,F,40000.0,40000,40000,40000
IT,M,30000.0,40000,20000,60000


In [9]:
df.groupby('Department').size()

Department
Finance    2
HR         2
IT         3
dtype: int64

In [11]:
grouped = df.groupby('Department')

for d_name, group in grouped:
    print(f'Department: {d_name}')
    print(group)
    print()

Department: Finance
   Name Department  Salary Gender
2   Bob    Finance   36000      M
6  John    Finance   25000      M

Department: HR
        Name Department  Salary Gender
3  Elizabeth         HR  100000      F
7      Mehak         HR   50000      F

Department: IT
     Name Department  Salary Gender
1   Alice         IT   20000      M
4   Smith         IT   40000      M
5  Rahila         IT   40000      F

