# Part 2c: Config-Driven Development

Load and process patient data with BMI calculations.

**Your task:** Load configuration from `config.yaml` instead of hardcoding values.

---

## Load configuration

In [7]:
import pandas as pd
import yaml
from pathlib import Path

# TODO: Load config.yaml using yaml.safe_load()
CONFIG_PATH = Path("config.yaml")
with CONFIG_PATH.open() as f:
    config = yaml.safe_load(f)
# TODO: Store result in a variable called 'config'

# Example structure you'll get:
# config = {
#     "data": {"input_file": "data/patient_intake.csv"},
#     "bounds": {
#         "weight_kg": {"min": 30, "max": 250},
#         "height_cm": {"min": 120, "max": 230},
#         "age": {"min": 0, "max": 110}
#     },
#     "bmi_thresholds": {
#         "underweight": 18.5,
#         "normal": 25,
#         "overweight": 30
#     }
# }

---

## Load data

In [8]:
# TODO: Replace hardcoded path with config["data"]["input_file"]
df = pd.read_csv("data/patient_intake.csv")

df.head()
df = pd.read_csv(config["data"]["input_file"])

---

## Calculate BMI

In [9]:
df["height_m"] = df["height_cm"] / 100
df["bmi"] = df["weight_kg"] / (df["height_m"] ** 2)
df["bmi"] = df["bmi"].round(1)

df[["patient_id", "weight_kg", "height_cm", "bmi"]].head()

Unnamed: 0,patient_id,weight_kg,height_cm,bmi
0,P001,91.5,177,29.2
1,P002,80.5,164,29.9
2,P003,74.3,163,28.0
3,P004,64.4,171,22.0
4,P005,72.8,178,23.0


---

## Categorize BMI

In [10]:
# TODO: Replace hardcoded thresholds with values from config["bmi_thresholds"]
#       Use: underweight, normal, overweight thresholds from config
#       Bins should be: [0, underweight, normal, overweight, inf]
underweight = config["bmi_thresholds"]["underweight"]
normal = config["bmi_thresholds"]["normal"]
overweight = config["bmi_thresholds"]["overweight"]

df["bmi_category"] = pd.cut(
    df["bmi"],
    bins=[0, 18.5, 25, 30, float("inf")],
    labels=["Underweight", "Normal", "Overweight", "Obese"],
    right=False
)

df[["patient_id", "bmi", "bmi_category"]].head()

Unnamed: 0,patient_id,bmi,bmi_category
0,P001,29.2,Overweight
1,P002,29.9,Overweight
2,P003,28.0,Overweight
3,P004,22.0,Normal
4,P005,23.0,Normal


---

## Summary statistics

In [11]:
summary = df.groupby("bmi_category")["patient_id"].count()
print("\nBMI category distribution:")
print(summary)

# TODO: Replace hardcoded 30 with config["bmi_thresholds"]["overweight"]
high_risk = df[df["bmi"] > config["bmi_thresholds"]["overweight"]]
print(f"\nHigh-risk patients (BMI > 30): {len(high_risk)}")


BMI category distribution:
bmi_category
Underweight     0
Normal         15
Overweight     21
Obese          14
Name: patient_id, dtype: int64

High-risk patients (BMI > 30): 14


  summary = df.groupby("bmi_category")["patient_id"].count()
