In [1]:
import pandas as pd

data = {
    "Name": ["Bilal", "Afan", "Saqib", "Sara", "Zara", "Ali"],
    "Department": ["IT", "IT", "HR", "IT", "HR", "Finance"],
    "Gender": ["Male", "Male", "Male", "Female", "Female", "Male"],
    "Salary": [50000, 52000, 45000, 60000, 48000, 70000]
}

df = pd.DataFrame(data)

print("--- Original Data ---")
print(df)

--- Original Data ---
    Name Department  Gender  Salary
0  Bilal         IT    Male   50000
1   Afan         IT    Male   52000
2  Saqib         HR    Male   45000
3   Sara         IT  Female   60000
4   Zara         HR  Female   48000
5    Ali    Finance    Male   70000


In [3]:
# Part 1:       Value Counts (.value_counts())
# This is the fastest way to answer: "How many of each X do we have?"
# It only works on one column at a time.

# 1. Basic Count
# "How many employees are in each Department?"
# Counts unique values and sorts them (Highest first)
dept_counts = df["Department"].value_counts()

print("\n--- Employee Count by Dept ---")
print(dept_counts)

# Output logic: It will show IT: 3, HR: 2, Finance: 1.


--- Employee Count by Dept ---
Department
IT         3
HR         2
Finance    1
Name: count, dtype: int64


In [5]:
# 2. Get Percentages (normalize=True)
# "What percentage of our staff is Male vs Female?"
# normalize=True turns the count into a decimal (percentage)
gender_ratio = df["Gender"].value_counts(normalize=True)

print("\n--- Gender Ratio ---")
print(gender_ratio)
# Output logic: Male: 0.66 (66%), Female: 0.33 (33%).


--- Gender Ratio ---
Gender
Male      0.666667
Female    0.333333
Name: proportion, dtype: float64


In [6]:
#      Part 2: Pivot Tables (.pivot_table())
# This is the Super Power of Pandas reporting. It transforms a long list of data into a neat Grid/Matrix.
# It requires 4 key arguments:
# values: The number you want to calculate (e.g., Salary).
# index: What do you want in the Rows? (e.g., Department).
# columns: What do you want across the Top? (e.g., Gender).
# aggfunc: What math to do? ('mean', 'sum', 'count').

In [8]:
# Scenario 1: The Basic Pivot
# Goal: "Show me the Average Salary for each Department, broken down by Gender."
pivot_df = df.pivot_table(
    values="Salary",      # The numbers we are analyzing
    index="Department",   # Rows
    columns="Gender",     # Columns
    aggfunc="mean"        # The math (Average)
)

print("\n--- Pivot Table: Avg Salary ---")
print(pivot_df)

# The Result:
# You will see "Department" on the left.
# You will see "Female" and "Male" across the top.
# The numbers in the middle are the average salaries.
# Note: If there is no data (e.g., No Female in Finance), it shows NaN.


--- Pivot Table: Avg Salary ---
Gender       Female     Male
Department                  
Finance         NaN  70000.0
HR          48000.0  45000.0
IT          60000.0  51000.0


In [9]:
# Scenario 2: Adding Totals (margins=True)
# Just like in Excel, you often want a "Grand Total" row and column.
pivot_totals = df.pivot_table(
    values="Salary",
    index="Department",
    columns="Gender",
    aggfunc="sum",        # Let's calculate Total Salary cost this time
    margins=True,         # Add "All" row/col
    margins_name="Total"  # Name it "Total" instead of "All"
)

print("\n--- Pivot Table with Totals ---")
print(pivot_totals)


--- Pivot Table with Totals ---
Gender        Female      Male   Total
Department                            
Finance          NaN   70000.0   70000
HR           48000.0   45000.0   93000
IT           60000.0  102000.0  162000
Total       108000.0  217000.0  325000
