Training

In [None]:
import os
import json
import numpy as np
import SimpleITK as sitk
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import joblib

# Load JSON
with open("train_val_split_fold0_reformatted.json", "r") as f:
    data = json.load(f)

train_entries = data["fold_0"]["train"]

def extract_features(img_path, mask_path):
    img = sitk.ReadImage(img_path)
    mask = sitk.ReadImage(mask_path)

    img_array = sitk.GetArrayFromImage(img)  # shape: (D, H, W)
    mask_array = sitk.GetArrayFromImage(mask).astype(bool)

    roi = img_array[mask_array]

    if roi.size == 0:
        return None  # skip empty masks

    features = {
        "mean": roi.mean(),
        "std": roi.std(),
        "min": roi.min(),
        "max": roi.max(),
        "percentile_25": np.percentile(roi, 25),
        "percentile_75": np.percentile(roi, 75),
        "volume_voxels": np.sum(mask_array),
    }
    return list(features.values())

# Load and extract features
X, y = [], []

for entry in tqdm(train_entries):
    img_path = entry["image"][0]
    mask_path = entry["mask"]
    label = entry["pcr"]

    feats = extract_features(img_path, mask_path)
    if feats is not None:
        X.append(feats)
        y.append(label)

X = np.array(X)
y = np.array(y)

# Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Handle class imbalance
ratio = sum(y == 0) / sum(y == 1)

# Train XGBoost
model = XGBClassifier(
    objective="binary:logistic",
    scale_pos_weight=ratio,
    n_estimators=100,
    max_depth=4,
    use_label_encoder=False,
    eval_metric="logloss"
)
model.fit(X_scaled, y)

# Save model & scaler
joblib.dump(model, "xgb_pcr_model.pkl")
joblib.dump(scaler, "scaler.pkl")

# Optional: check training accuracy
y_pred = model.predict(X_scaled)
y_prob = model.predict_proba(X_scaled)[:, 1]
print(classification_report(y, y_pred))

Inference

In [None]:
# Load
model = joblib.load("xgb_pcr_model.pkl")
scaler = joblib.load("scaler.pkl")

# New case
img_path = "/path/to/new_image.nii.gz"
mask_path = "/path/to/new_mask.nii.gz"
features = extract_features(img_path, mask_path)
features_scaled = scaler.transform([features])
prob = model.predict_proba(features_scaled)[0][1]
label = int(prob > 0.5)

print(f"Predicted label: {label}, Probability of class 1: {prob:.4f}")