# Part 2c: Config-Driven Development

Load and process patient data with BMI calculations.

**Your task:** Load configuration from `config.yaml` instead of hardcoding values.

---

## Load configuration

In [1]:

import pandas as pd
import yaml
from pathlib import Path

# Load config.yaml and store result in 'config' variable
Config_PATH = Path("config.yaml") 
with Config_PATH.open() as file:
    config = yaml.safe_load(file)

for key, value in config.items():
    print(f"{key}: {value}")

data: {'input_file': 'data/patient_intake.csv'}
bounds: {'weight_kg': {'min': 30, 'max': 250}, 'height_cm': {'min': 120, 'max': 230}, 'age': {'min': 0, 'max': 110}}
bmi_thresholds: {'underweight': 18.5, 'normal': 25, 'overweight': 30}


---

## Load data

In [2]:
# Replace hardcoded path with config["data"]["input_file"]
# df = pd.read_csv("data/patient_intake.csv") | Original
input_path = config["data"]["input_file"]
df = pd.read_csv(input_path)
df.head()

Unnamed: 0,patient_id,first_name,last_name,weight_kg,height_cm,age,sex
0,P001,Mark,Johnson,91.5,177,46,M
1,P002,Donald,Walker,80.5,164,29,M
2,P003,Nancy,Rhodes,74.3,163,47,F
3,P004,Steven,Miller,64.4,171,71,M
4,P005,Javier,Johnson,72.8,178,18,M


---

## Calculate BMI

In [3]:
df["height_m"] = df["height_cm"] / 100
df["bmi"] = df["weight_kg"] / (df["height_m"] ** 2)
df["bmi"] = df["bmi"].round(1)

df[["patient_id", "weight_kg", "height_cm", "bmi"]].head()

Unnamed: 0,patient_id,weight_kg,height_cm,bmi
0,P001,91.5,177,29.2
1,P002,80.5,164,29.9
2,P003,74.3,163,28.0
3,P004,64.4,171,22.0
4,P005,72.8,178,23.0


---

## Categorize BMI

In [4]:
# Replaced hardcoded thresholds with values from config["bmi_thresholds"]
# Use: underweight, normal, overweight thresholds from config
# Bins: [0, underweight, normal, overweight, inf]
thresholds = config["bmi_thresholds"]
df["bmi_category"] = pd.cut(
    df["bmi"],
    bins = [0, thresholds["underweight"], thresholds["normal"], thresholds["overweight"], float("inf")],
    labels = ["Underweight", "Normal", "Overweight", "Obese"],
    right = False
)
df[["patient_id", "bmi", "bmi_category"]].head()

Unnamed: 0,patient_id,bmi,bmi_category
0,P001,29.2,Overweight
1,P002,29.9,Overweight
2,P003,28.0,Overweight
3,P004,22.0,Normal
4,P005,23.0,Normal


---

## Summary statistics

In [5]:
summary = df.groupby("bmi_category")["patient_id"].count()
print("\nBMI category distribution:")
print(summary)

# Replaced hardcoded 30 with config["bmi_thresholds"]["overweight"]
overweight_threshold = config["bmi_thresholds"]["overweight"]
high_risk = df[df["bmi"] > overweight_threshold]
print(f"\nHigh-risk patients (BMI > {overweight_threshold}): {len(high_risk)}")


BMI category distribution:
bmi_category
Underweight     0
Normal         15
Overweight     21
Obese          14
Name: patient_id, dtype: int64

High-risk patients (BMI > 30): 14


  summary = df.groupby("bmi_category")["patient_id"].count()
