# ✅ Data Wrangling with pandas – Answer Key

This notebook provides worked answers to the exercises using pandas for:
- Transforming
- Joining
- Merging
- Appending

## 📦 Step 1: Setup – Create sample data

In [4]:
import pandas as pd

# Employee data
df_employees = pd.DataFrame({
    'EmployeeID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Department': ['HR', 'IT', 'Finance']
})

# Salary data
df_salaries = pd.DataFrame({
    'EmployeeID': [1, 2, 4],
    'Salary': [90000, 60000, 55000]
})

df_employees, df_salaries

StatementMeta(, afa9a062-de78-4f7f-8ee2-20686eb6162a, 6, Finished, Available, Finished)

(   EmployeeID     Name Department
 0           1    Alice         HR
 1           2      Bob         IT
 2           3  Charlie    Finance,
    EmployeeID  Salary
 0           1   90000
 1           2   60000
 2           4   55000)

## ✂️ Step 2: Transform Columns

In [5]:
df_salaries['SalaryBand'] = pd.cut(
    df_salaries['Salary'],
    bins=[0, 55000, 60000, 100000],
    labels=['Low', 'Medium', 'High']
)
df_salaries

StatementMeta(, afa9a062-de78-4f7f-8ee2-20686eb6162a, 7, Finished, Available, Finished)

Unnamed: 0,EmployeeID,Salary,SalaryBand
0,1,90000,High
1,2,60000,Medium
2,4,55000,Low


## 🔗 Step 3: Join DataFrames (by index)

In [6]:
joined_df = df_employees.set_index('EmployeeID').join(
    df_salaries.set_index('EmployeeID'),
    how='left'
)
joined_df

StatementMeta(, afa9a062-de78-4f7f-8ee2-20686eb6162a, 8, Finished, Available, Finished)

Unnamed: 0_level_0,Name,Department,Salary,SalaryBand
EmployeeID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Alice,HR,90000.0,High
2,Bob,IT,60000.0,Medium
3,Charlie,Finance,,


## 🔀 Step 4: Merge DataFrames (by column)

In [7]:
merged_inner = pd.merge(df_employees, df_salaries, on='EmployeeID', how='inner')
merged_left = pd.merge(df_employees, df_salaries, on='EmployeeID', how='left')
merged_inner, merged_left

StatementMeta(, afa9a062-de78-4f7f-8ee2-20686eb6162a, 9, Finished, Available, Finished)

(   EmployeeID   Name Department  Salary SalaryBand
 0           1  Alice         HR   90000       High
 1           2    Bob         IT   60000     Medium,
    EmployeeID     Name Department   Salary SalaryBand
 0           1    Alice         HR  90000.0       High
 1           2      Bob         IT  60000.0     Medium
 2           3  Charlie    Finance      NaN        NaN)

## ➕ Step 5: Append a New Row

In [8]:
new_row = pd.DataFrame([{'EmployeeID': 5, 'Name': 'Diana', 'Department': 'Marketing'}])
df_employees_appended = pd.concat([df_employees, new_row], ignore_index=True)
df_employees_appended

StatementMeta(, afa9a062-de78-4f7f-8ee2-20686eb6162a, 10, Finished, Available, Finished)

Unnamed: 0,EmployeeID,Name,Department
0,1,Alice,HR
1,2,Bob,IT
2,3,Charlie,Finance
3,5,Diana,Marketing


## 🎯 Bonus Challenge – Group by and Aggregate

In [9]:
merged = pd.merge(df_employees, df_salaries, on='EmployeeID', how='inner')
grouped = merged.groupby('Department')['Salary'].mean()
grouped

StatementMeta(, afa9a062-de78-4f7f-8ee2-20686eb6162a, 11, Finished, Available, Finished)

Department
HR    90000.0
IT    60000.0
Name: Salary, dtype: float64