# Part 2a: Add Logging

Load and process patient data with BMI calculations.

**Your task:** Add logging statements to track the data processing steps.

---

## Load data

In [1]:
# Import libraries
import pandas as pd
import logging # Import logging module

# Run configurations & basic setup
logging.basicConfig(level = logging.INFO, format = "%(levelname)s:%(message)s") # Configure logging
logging.info(" Reading %s", "'data/patient_intake.csv'") # Log before reading CSV
df = pd.read_csv("data/patient_intake.csv")
logging.info(" Loaded '%d' rows", len(df)) # Log after reading CSV
df.head()

INFO: Reading 'data/patient_intake.csv'
INFO: Loaded '50' rows


Unnamed: 0,patient_id,first_name,last_name,weight_kg,height_cm,age,sex
0,P001,Mark,Johnson,91.5,177,46,M
1,P002,Donald,Walker,80.5,164,29,M
2,P003,Nancy,Rhodes,74.3,163,47,F
3,P004,Steven,Miller,64.4,171,71,M
4,P005,Javier,Johnson,72.8,178,18,M


---

## Calculate BMI

In [2]:
# BMI Calculations
logging.info(" Beginning BMI calculations. Number of patients: '%d'", len(df)) # Log before BMI calculation
df["height_m"] = df["height_cm"] / 100
df["bmi"] = df["weight_kg"] / (df["height_m"] ** 2)
df["bmi"] = df["bmi"].round(1)
logging.info(" Completed BMI calculations. Range of BMI values: %.2f - %.2f [Difference: %.2f]", 
             df["bmi"].min(), df["bmi"].max(), df["bmi"].max() - df["bmi"].min())  # Log after BMI calculation
df[["patient_id", "weight_kg", "height_cm", "bmi"]].head()

INFO: Beginning BMI calculations. Number of patients: '50'
INFO: Completed BMI calculations. Range of BMI values: 19.90 - 36.10 [Difference: 16.20]


Unnamed: 0,patient_id,weight_kg,height_cm,bmi
0,P001,91.5,177,29.2
1,P002,80.5,164,29.9
2,P003,74.3,163,28.0
3,P004,64.4,171,22.0
4,P005,72.8,178,23.0


---

## Categorize BMI

In [3]:
df["bmi_category"] = pd.cut(
    df["bmi"],
    bins=[0, 18.5, 25, 30, float("inf")],
    labels=["Underweight", "Normal", "Overweight", "Obese"],
    right=False
)

df[["patient_id", "bmi", "bmi_category"]].head()

Unnamed: 0,patient_id,bmi,bmi_category
0,P001,29.2,Overweight
1,P002,29.9,Overweight
2,P003,28.0,Overweight
3,P004,22.0,Normal
4,P005,23.0,Normal


---

## Summary statistics

In [4]:
summary = df.groupby("bmi_category")["patient_id"].count()
print("\nBMI category distribution:")
print(summary)

high_risk = df[df["bmi"] > 30]
print(f"\nHigh-risk patients (BMI > 30): {len(high_risk)}")

# logging.info() statement summarizing processing completion
logging.info(" BMI processing complete. Outputting summary details . . .")
logging.info(" Total patients: '%d', High-risk patients: '%d', Proportion of patients High-risk: '%.2f%%'", 
             len(df), len(high_risk), len(high_risk) / len(df) * 100)
logging.info(" BMI category distribution: %s", 
             ", ".join(f"{category}: {value}" for category, value in summary.to_dict().items()))


  summary = df.groupby("bmi_category")["patient_id"].count()
INFO: BMI processing complete. Outputting summary details . . .
INFO: Total patients: '50', High-risk patients: '14', Proportion of patients High-risk: '28.00%'
INFO: BMI category distribution: Underweight: 0, Normal: 15, Overweight: 21, Obese: 14



BMI category distribution:
bmi_category
Underweight     0
Normal         15
Overweight     21
Obese          14
Name: patient_id, dtype: int64

High-risk patients (BMI > 30): 14
