# Part 2a: Add Logging

Load and process patient data with BMI calculations.

**Your task:** Add logging statements to track the data processing steps.

---

## Load data

In [1]:
import pandas as pd
import os

# TODO: Import logging module
# TODO: Configure logging with basicConfig (level=INFO, format="%(levelname)s:%(message)s")
# TODO: Add logging.info() statement before reading CSV
import logging
logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(message)s")
logging.info("Loading patient intake data from CSV file.")

df = pd.read_csv("data/patient_intake.csv")

# TODO: Add logging.info() statement after loading (mention number of rows loaded)

df.head()
logging.info(f"Loaded {len(df)} rows of patient intake data.")

INFO:Loading patient intake data from CSV file.
INFO:Loaded 50 rows of patient intake data.


---

## Calculate BMI

In [3]:
# TODO: Add logging.info() statement at start of BMI calculation

logging.info("Calculating BMI for each patient.")
df["height_m"] = df["height_cm"] / 100
df["bmi"] = df["weight_kg"] / (df["height_m"] ** 2)
df["bmi"] = df["bmi"].round(1)

# TODO: Add logging.info() statement after BMI calculation (mention BMI range)
df[["patient_id", "weight_kg", "height_cm", "bmi"]].head()
logging.info(
    "BMI calculation complete. BMI ranges from %.1f to %.1f",
    df["bmi"].min(),
    df["bmi"].max(),
)

INFO:Calculating BMI for each patient.
INFO:BMI calculation complete. BMI ranges from 19.9 to 36.1


---

## Categorize BMI

In [4]:
df["bmi_category"] = pd.cut(
    df["bmi"],
    bins=[0, 18.5, 25, 30, float("inf")],
    labels=["Underweight", "Normal", "Overweight", "Obese"],
    right=False
)

df[["patient_id", "bmi", "bmi_category"]].head()

Unnamed: 0,patient_id,bmi,bmi_category
0,P001,29.2,Overweight
1,P002,29.9,Overweight
2,P003,28.0,Overweight
3,P004,22.0,Normal
4,P005,23.0,Normal


---

## Summary statistics

In [5]:
summary = df.groupby("bmi_category")["patient_id"].count()
print("\nBMI category distribution:")
print(summary)

high_risk = df[df["bmi"] > 30]
print(f"\nHigh-risk patients (BMI > 30): {len(high_risk)}")

# TODO: Add logging.info() statement summarizing processing completion
logging.info("Patient intake data processing complete.")

INFO:Patient intake data processing complete.



BMI category distribution:
bmi_category
Underweight     0
Normal         15
Overweight     21
Obese          14
Name: patient_id, dtype: int64

High-risk patients (BMI > 30): 14
