# 4.0-prediction.ipynb
# ---------------------------------------------
# Diabetes Risk Prediction - Inference
# ---------------------------------------------

In [1]:
# 1️ Imports
import pandas as pd
import numpy as np
from pathlib import Path
import joblib
from loguru import logger

In [2]:
# 2️ Paths
from aegis_health_ml.config import  MODELS_DIR, PROCESSED_DATA_DIR

# 3 load model and preprocessor
model_path = Path(MODELS_DIR)
model_dict = joblib.load(model_path)
model = model_dict["model"]
preprocessor = model_dict["preprocessor"] 

[32m2026-01-18 06:28:36.674[0m | [1mINFO    [0m | [36maegis_health_ml.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: /home/habtemariam/Documents/ML/aegis-health/aegis-health-ml[0m


In [3]:
# 3️ Example Input Data
# This is a single patient record; you can also create a DataFrame for batch predictions
sample_input = {
    "Pregnancies": 6,
    "Glucose": 148,
    "BloodPressure": 72,
    "SkinThickness": 35,
    "Insulin": 0,
    "BMI": 33.6,
    "DiabetesPedigreeFunction": 0.627,
    "Age": 50
}

In [4]:
df_input = pd.DataFrame([sample_input])

In [6]:
# 4️ Feature Engineering (must match training)
logger.info("Applying feature transformations...")

# BMI Category
df_input["BMI_Category"] = pd.cut(
    df_input["BMI"],
    bins=[0, 18.5, 25, 30, np.inf],
    labels=["Underweight", "Normal", "Overweight", "Obese"]
)

# Age Group
df_input["Age_Group"] = pd.cut(
    df_input["Age"],
    bins=[0, 30, 45, 60, np.inf],
    labels=["Young", "Middle", "Senior", "Elder"]
)

# High Glucose Flag
df_input["High_Glucose"] = (df_input["Glucose"] >= 140).astype(int)

[32m2026-01-18 06:29:57.684[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mApplying feature transformations...[0m


In [7]:
# 5️ Apply Preprocessor
logger.info("Applying saved preprocessor to transform features...")
X_processed = preprocessor.transform(df_input)

[32m2026-01-18 06:30:17.369[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mApplying saved preprocessor to transform features...[0m


In [8]:
# 6️ Predict Probabilities
logger.info("Making predictions...")
prob = model.predict_proba(X_processed)[0][1]

[32m2026-01-18 06:30:50.872[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mMaking predictions...[0m


In [9]:
# 7️ Apply Custom Threshold
threshold = 0.4  # you can adjust this based on clinical requirements
prediction = 1 if prob >= threshold else 0
risk_level = "High Risk" if prediction == 1 else "Low/Standard Risk"


In [10]:
# 8️ Display Results
result = {
    "risk_probability": round(float(prob) * 100, 2),
    "prediction": int(prediction),
    "risk_level": risk_level
}

logger.success("Prediction complete!")
print("--- Prediction Results ---")
print(f"Risk Probability: {result['risk_probability']}%")
print(f"Predicted Class: {result['prediction']}")
print(f"Risk Level: {result['risk_level']}")


[32m2026-01-18 06:31:25.599[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [32m[1mPrediction complete![0m
--- Prediction Results ---
Risk Probability: 59.89%
Predicted Class: 1
Risk Level: High Risk
