# Using Exported Models for Inference (LR & DT)
This notebook demonstrates how to run inference using (1) exported logistic regression coefficients and (2) exported decision tree rules. It also shows how to validate those predictions against the original sklearn models (if available) and how to compute and export evaluation artifacts.

## 1) Setup: Imports and Paths
We'll import the helper functions from `python_scripts/training_utils.py`, configure logging, and set up paths and a threshold used for classification.

In [23]:
# Imports and Paths
from pathlib import Path
from IPython.display import display
import json
import numpy as np
import pandas as pd
import os 
import sys
# Get the path to the parent directory
parent_dir = os.path.abspath(os.path.join(os.path.dirname("notebooks"), '..'))

# Add the parent directory to sys.path
sys.path.insert(0, parent_dir)

from python_scripts.training_utils import (
    project_paths, init_logging, load_model, load_feature_list,
    load_lr_coefficients_csv, predict_lr_with_raw_params, predict_with_lr_pipeline,
    load_dt_rules_json, predict_with_dt_rules, predict_with_dt_model,
    mark_dt_threshold_predictions, render_decision_rules_text,
    compute_performance_metrics, export_confusion_matrix_csv, compare_predictions,
 )

# Initialize logging
init_logging()
paths = project_paths()

# --- Configure file locations ---
# You can update these to match your specific artifacts.
# Dataset: use testing if present, else fall back to training
data_csv = paths["datasets"] / "testing_sysmex_deduped.csv"
if not data_csv.exists():
    data_csv = paths["datasets"] / "training_data.csv"

# Feature list (txt) used for both LR and DT models
# Example naming: 'lr_CBC_DIFF_1_boruta.txt' or 'dt_CBC_DIFF_1_boruta.txt'
# Pick one consistent with your exported artifacts:
feature_space = "CBC_DIFF"
weight = "1"
fsm = "boruta"
features_txt_lr = paths["features"] / f"lr_{feature_space}_{weight}_{fsm}.txt"
features_txt_dt = paths["features"] / f"dt_{feature_space}_{weight}_{fsm}.txt"

# Exported artifacts
lr_coeffs_csv = paths["root"] / "exports" / f"lr_coeffs_{feature_space}_{weight}_{fsm}.csv"
lr_pipeline_pkl = paths["models"] / f"lr_{feature_space}_{weight}_{fsm}.sav"
dt_rules_json = paths["root"] / "exports" / f"dt_rules_{feature_space}_{weight}_{fsm}.json"
dt_model_pkl = paths["models"] / f"dt_{feature_space}_{weight}_{fsm}.sav"

# Outputs
out_dir = paths["root"] / "notebook_outputs"
out_dir.mkdir(parents=True, exist_ok=True)

# Classification threshold (applies to both LR and DT)
threshold = 0.3
print("Using threshold:", threshold)

Using threshold: 0.3


## 2) Load Inference Dataset
We'll load the dataset and align it to the expected feature set. If the labels column `isPOS` exists, we'll keep it for evaluation.

In [24]:
# Load the dataset
df = pd.read_csv(data_csv)
print("Loaded:", data_csv, "with shape", df.shape)

# Load feature orders
feat_order_lr = load_feature_list(features_txt_lr) if features_txt_lr.exists() else None
feat_order_dt = load_feature_list(features_txt_dt) if features_txt_dt.exists() else None
print("LR features txt:", features_txt_lr.exists(), "DT features txt:", features_txt_dt.exists())

# Determine target label if present
target_col = "isPOS" if "isPOS" in df.columns else None
y = df[target_col].astype(int) if target_col else None

# Align and coerce feature matrices
def align_X(df_in: pd.DataFrame, feat_order: list[str]) -> pd.DataFrame:
    missing = [c for c in feat_order if c not in df_in.columns]
    if missing:
        raise KeyError(f"Missing required columns for inference: {missing}")
    X = df_in[feat_order].copy().astype(float)
    # Drop rows with missing values in required columns
    X = X.dropna(subset=feat_order)
    return X

X_lr = align_X(df, feat_order_lr) if feat_order_lr else None
X_dt = align_X(df, feat_order_dt) if feat_order_dt else None
print("X_lr shape:", None if X_lr is None else X_lr.shape, "| X_dt shape:", None if X_dt is None else X_dt.shape)

Loaded: /Users/benjaminmcfadden/Documents/REPOS/blood-culture-outcome-classification/datasets/testing_sysmex_deduped.csv with shape (318, 431)
LR features txt: True DT features txt: True
X_lr shape: (318, 17) | X_dt shape: (318, 17)


## 3) Load Logistic Regression Coefficients
Load the raw-space coefficients and intercept exported from the training pipeline.

In [25]:
# Load LR coefficients CSV
if not lr_coeffs_csv.exists():
    raise FileNotFoundError(f"Missing LR coefficients CSV: {lr_coeffs_csv}")
feat_order_lr_csv, weights_raw, intercept_raw = load_lr_coefficients_csv(lr_coeffs_csv)
print("Loaded LR coeffs for", len(feat_order_lr_csv), "features")

# Ensure X_lr aligns with the order in the coefficients file
if X_lr is None:
    X_lr = align_X(df, feat_order_lr_csv)
else:
    # Reorder to match coefficients if necessary
    X_lr = X_lr[feat_order_lr_csv].copy()
X_lr = X_lr.astype(float)

# Note: scoring uses z = Xw + b and p = 1/(1+exp(-z))

Loaded LR coeffs for 17 features


## 4) Predict with Logistic Coefficients (Raw Space)
Run logistic regression inference using the exported coefficients. If labels are available, compute metrics.

In [26]:
# Predict with LR coefficients
proba_lr, preds_lr = predict_lr_with_raw_params(
    X_lr, feat_order_lr_csv, weights_raw, intercept_raw, threshold=threshold
 )
print("LR predictions:", len(preds_lr))

# Show head
display(pd.DataFrame({
    "proba_lr": proba_lr[:10],
    "preds_lr": preds_lr[:10],
}))

# Metrics if labels available
if y is not None:
    # Ensure y is aligned to X rows (drop rows that were removed due to NA)
    y_aligned = y.loc[X_lr.index].values.astype(int)
    metrics_lr = compute_performance_metrics(y_aligned, preds_lr, proba_lr)
    print("LR metrics:", json.dumps(metrics_lr, indent=2))

LR predictions: 318


Unnamed: 0,proba_lr,preds_lr
0,0.53453,1
1,0.435435,1
2,0.067803,0
3,0.366797,1
4,0.140166,0
5,0.778879,1
6,0.418181,1
7,0.221608,0
8,0.552619,1
9,0.360949,1


LR metrics: {
  "n": 318.0,
  "tp": 26.0,
  "tn": 72.0,
  "fp": 220.0,
  "fn": 0.0,
  "accuracy": 0.3081761006289308,
  "recall": 1.0,
  "precision": 0.10569105691056911,
  "roc_auc": 0.7837197049525817,
  "balanced_accuracy": 0.6232876712328768,
  "specificity": 0.2465753424657534,
  "j_stat": 0.24657534246575352,
  "f2": 0.3714285714285714,
  "lr_plus": 1.3272727272727272,
  "lr_minus": 0.0
}


## 5) Validate LR Coefficient Inference vs Pipeline
If the original LR pipeline is available, we can compare the coefficient-based predictions to the pipeline outputs.

In [27]:
# Validate vs pipeline (if available)
if lr_pipeline_pkl.exists():
    pipeline = load_model(lr_pipeline_pkl)
    proba_skl, preds_skl = predict_with_lr_pipeline(pipeline, X_lr, threshold=threshold)
    cmp = compare_predictions(proba_lr, preds_lr, proba_skl, preds_skl, prob_tol=1e-9)
    print("LR compare (coeffs vs pipeline):", json.dumps(cmp, indent=2))
    if y is not None:
        y_aligned = y.loc[X_lr.index].values.astype(int)
        print("LR pipeline metrics:")
        print(json.dumps(compute_performance_metrics(y_aligned, preds_skl, proba_skl), indent=2))
else:
    print("LR pipeline pickle not found:", lr_pipeline_pkl)

LR compare (coeffs vs pipeline): {
  "preds_equal": true,
  "mismatch_count": 0,
  "prob_all_close": true,
  "prob_max_abs_diff": 6.994405055138486e-15,
  "prob_mean_abs_diff": 1.4172761544872476e-15,
  "count": 318,
  "prob_tol": 1e-09
}
LR pipeline metrics:
{
  "n": 318.0,
  "tp": 26.0,
  "tn": 72.0,
  "fp": 220.0,
  "fn": 0.0,
  "accuracy": 0.3081761006289308,
  "recall": 1.0,
  "precision": 0.10569105691056911,
  "roc_auc": 0.7837197049525817,
  "balanced_accuracy": 0.6232876712328768,
  "specificity": 0.2465753424657534,
  "j_stat": 0.24657534246575352,
  "f2": 0.3714285714285714,
  "lr_plus": 1.3272727272727272,
  "lr_minus": 0.0
}


## 6) Load Decision Tree Rules
Load the exported JSON rules and (optionally) annotate leaves with threshold-based predictions.

In [28]:
# Load DT rules JSON
if not dt_rules_json.exists():
    raise FileNotFoundError(f"Missing DT rules JSON: {dt_rules_json}")
tree_dict, saved_thr = load_dt_rules_json(dt_rules_json)
print("Loaded DT rules. Saved threshold in file:", saved_thr)

# Optionally mark leaves using current threshold
tree_dict = mark_dt_threshold_predictions(tree_dict, threshold)

# Feature order for DT is described by the features txt (as exported at training time)
if X_dt is None:
    # Fall back to LR feature order if DT features file is missing but rules reference the same names
    # Otherwise, ensure you create a DT features file matching your model artifacts.
    if feat_order_lr is None:
        raise FileNotFoundError("DT features list not found. Ensure a matching features txt exists.")
    X_dt = align_X(df, feat_order_lr)
feature_order_dt = list(X_dt.columns)

Loaded DT rules. Saved threshold in file: 0.3


## 7) Predict with Decision Tree Rules
Infer probabilities and labels from the rules; thresholding converts probabilities to binary predictions.

In [29]:
# Predict using the rules
proba_dt_rules, preds_dt_leaf = predict_with_dt_rules(X_dt, list(X_dt.columns), tree_dict)
preds_dt_thresh = (proba_dt_rules >= threshold).astype(int)
print("DT rule-based predictions:", len(preds_dt_thresh))

# Show head
display(pd.DataFrame({
    "proba_dt_rules": proba_dt_rules[:10],
    "preds_dt_thresh": preds_dt_thresh[:10],
    "preds_dt_leaf": preds_dt_leaf[:10],
}))

# Metrics if labels available
if y is not None:
    y_aligned_dt = y.loc[X_dt.index].values.astype(int)
    metrics_dt = compute_performance_metrics(y_aligned_dt, preds_dt_thresh, proba_dt_rules)
    print("DT rule metrics:", json.dumps(metrics_dt, indent=2))

DT rule-based predictions: 318


Unnamed: 0,proba_dt_rules,preds_dt_thresh,preds_dt_leaf
0,0.433409,1,0
1,0.433409,1,0
2,0.207423,0,0
3,0.433409,1,0
4,0.207423,0,0
5,0.76153,1,1
6,0.433409,1,0
7,0.207423,0,0
8,0.433409,1,0
9,0.207423,0,0


DT rule metrics: {
  "n": 318.0,
  "tp": 25.0,
  "tn": 101.0,
  "fp": 191.0,
  "fn": 1.0,
  "accuracy": 0.39622641509433965,
  "recall": 0.9615384615384616,
  "precision": 0.11574074074074074,
  "roc_auc": 0.7907665964172813,
  "balanced_accuracy": 0.6537144362486829,
  "specificity": 0.3458904109589041,
  "j_stat": 0.3074288724973657,
  "f2": 0.39062500000000006,
  "lr_plus": 1.4699959726137735,
  "lr_minus": 0.11119573495811112
}


## 8) Validate Decision Tree Rule Inference vs Model
If the sklearn DT model is available, compare rule-based predictions to the model outputs.

In [30]:
# Validate vs DT model (if available)
if dt_model_pkl.exists():
    dt_model = load_model(dt_model_pkl)
    proba_dt_skl, preds_dt_skl = predict_with_dt_model(dt_model, X_dt, threshold=threshold)
    cmp_dt = compare_predictions(proba_dt_rules, preds_dt_thresh, proba_dt_skl, preds_dt_skl, prob_tol=1e-9)
    print("DT compare (rules vs model):", json.dumps(cmp_dt, indent=2))
    if y is not None:
        y_aligned_dt = y.loc[X_dt.index].values.astype(int)
        print("DT sklearn metrics:")
        print(json.dumps(compute_performance_metrics(y_aligned_dt, preds_dt_skl, proba_dt_skl), indent=2))
else:
    print("DT model pickle not found:", dt_model_pkl)

DT compare (rules vs model): {
  "preds_equal": true,
  "mismatch_count": 0,
  "prob_all_close": true,
  "prob_max_abs_diff": 2.3425705819590803e-14,
  "prob_mean_abs_diff": 9.92802974701554e-15,
  "count": 318,
  "prob_tol": 1e-09
}
DT sklearn metrics:
{
  "n": 318.0,
  "tp": 25.0,
  "tn": 101.0,
  "fp": 191.0,
  "fn": 1.0,
  "accuracy": 0.39622641509433965,
  "recall": 0.9615384615384616,
  "precision": 0.11574074074074074,
  "roc_auc": 0.7907665964172813,
  "balanced_accuracy": 0.6537144362486829,
  "specificity": 0.3458904109589041,
  "j_stat": 0.3074288724973657,
  "f2": 0.39062500000000006,
  "lr_plus": 1.4699959726137735,
  "lr_minus": 0.11119573495811112
}


## 9) Render and Inspect Decision Rules Text
You can render human-readable rules from the JSON tree and inspect leaf annotations.

In [31]:
# Render rules text
lines = render_decision_rules_text(tree_dict)
print("\n".join(lines[:200]))  # print the first ~200 lines for brevity

if NEUT%(%) <= 85.850002:
  if MONO#(10^9/L) <= 0.275000:
    if MONO%(%) <= 2.450000:
      => leaf: pred_class=1 prob_pos=0.811 pred_thresh=1 counts=[0.18853345077471192, 0.811466549225284]
    else:  # MONO%(%) > 2.450000
      => leaf: pred_class=1 prob_pos=0.503 pred_thresh=1 counts=[0.4972500675462207, 0.5027499324537767]
  else:  # MONO#(10^9/L) > 0.275000
    if NLR <= 5.838250:
      => leaf: pred_class=0 prob_pos=0.207 pred_thresh=0 counts=[0.792576528759268, 0.2074234712407495]
    else:  # NLR > 5.838250
      => leaf: pred_class=0 prob_pos=0.433 pred_thresh=1 counts=[0.5665907537584309, 0.43340924624160176]
else:  # NEUT%(%) > 85.850002
  if NEUT%(%) <= 90.750000:
    if RDW-CV(%) <= 12.250000:
      => leaf: pred_class=0 prob_pos=0.253 pred_thresh=0 counts=[0.7469662921348313, 0.25303370786516866]
    else:  # RDW-CV(%) > 12.250000
      => leaf: pred_class=1 prob_pos=0.655 pred_thresh=1 counts=[0.34548842861025225, 0.6545115713897834]
  else:  # NEUT%(%) > 90.750000
    

## 10) Compute and Export Evaluation Artifacts
If labels are available, we can export confusion matrices and a combined results CSV for further analysis.

In [32]:
# Export artifacts (optional)
if y is not None:
    y_lr = y.loc[X_lr.index].values.astype(int)
    y_dt = y.loc[X_dt.index].values.astype(int)
    # Confusion matrices
    export_confusion_matrix_csv(out_dir / "cm_lr_from_coeffs.csv", y_lr, preds_lr.astype(int))
    export_confusion_matrix_csv(out_dir / "cm_dt_from_rules.csv", y_dt, preds_dt_thresh.astype(int))
    print("Exported confusion matrices to:", out_dir)

    # Combined results CSV
    df_out = pd.DataFrame({
        "index": X_lr.index,
        "proba_lr": proba_lr,
        "preds_lr": preds_lr,
        "proba_dt_rules": proba_dt_rules,
        "preds_dt_thresh": preds_dt_thresh,
    })
    if target_col:
        df_out[target_col] = y.loc[X_lr.index].values.astype(int)
    df_out.to_csv(out_dir / "inference_results_lr_dt.csv", index=False)
    print("Wrote:", out_dir / "inference_results_lr_dt.csv")

Exported confusion matrices to: /Users/benjaminmcfadden/Documents/REPOS/blood-culture-outcome-classification/notebook_outputs
Wrote: /Users/benjaminmcfadden/Documents/REPOS/blood-culture-outcome-classification/notebook_outputs/inference_results_lr_dt.csv


## 11) Optional: Batch Scoring Functions (LR and DT)
Reusable helpers for batch scoring from CSV input files. Update paths as needed.

In [33]:
from typing import Optional, Tuple

def score_with_lr_coeffs(input_csv: Path, features_txt: Path, coeffs_csv: Path, out_csv: Path, threshold: float = 0.3, target_col: Optional[str] = "isPOS") -> Tuple[pd.DataFrame, Optional[dict]]:
    df_in = pd.read_csv(input_csv)
    feat_order = load_feature_list(features_txt)
    X = df_in[feat_order].copy().astype(float).dropna(subset=feat_order)
    y_local = df_in[target_col].astype(int).loc[X.index].values if (target_col and target_col in df_in.columns) else None
    feats, weights, intercept = load_lr_coefficients_csv(coeffs_csv)
    proba, preds = predict_lr_with_raw_params(X[feats], feats, weights, intercept, threshold=threshold)
    out_df = pd.DataFrame({"index": X.index, "proba_lr": proba, "preds_lr": preds})
    out_df.to_csv(out_csv, index=False)
    metrics = compute_performance_metrics(y_local, preds, proba) if y_local is not None else None
    return out_df, metrics

def score_with_dt_rules(input_csv: Path, features_txt: Path, rules_json: Path, out_csv: Path, threshold: float = 0.3, target_col: Optional[str] = "isPOS") -> Tuple[pd.DataFrame, Optional[dict]]:
    df_in = pd.read_csv(input_csv)
    feat_order = load_feature_list(features_txt)
    X = df_in[feat_order].copy().astype(float).dropna(subset=feat_order)
    y_local = df_in[target_col].astype(int).loc[X.index].values if (target_col and target_col in df_in.columns) else None
    tree, saved_thr = load_dt_rules_json(rules_json)
    tree = mark_dt_threshold_predictions(tree, threshold)
    proba, preds_leaf = predict_with_dt_rules(X, feat_order, tree)
    preds = (proba >= threshold).astype(int)
    out_df = pd.DataFrame({"index": X.index, "proba_dt_rules": proba, "preds_dt_thresh": preds, "preds_dt_leaf": preds_leaf})
    out_df.to_csv(out_csv, index=False)
    metrics = compute_performance_metrics(y_local, preds, proba) if y_local is not None else None
    return out_df, metrics

print("Defined batch scoring helpers: score_with_lr_coeffs, score_with_dt_rules")

Defined batch scoring helpers: score_with_lr_coeffs, score_with_dt_rules
