# Positron Angle Inference

Run downstream positron momentum-vector quantile inference and export per-event prediction parquet files to `data/positron_angle_regression`.

Inference writes raw quantile vectors (`q16/q50/q84`) for `(px, py, pz)`; angular validation is handled in the validation notebook.


In [1]:
from pathlib import Path

from pioneerml.common.zenml import load_step_output
from pioneerml.common.zenml import utils as zenml_utils
from pioneerml.pipelines.inference.positron_angle import positron_angle_regression_inference_pipeline

PROJECT_ROOT = zenml_utils.find_project_root()
zenml_utils.setup_zenml_for_notebook(root_path=PROJECT_ROOT, use_in_memory=True)

PIPELINE = positron_angle_regression_inference_pipeline
OUTPUT_SUBDIR = "positron_angle_regression"
SAVE_STEP = "save_positron_angle_predictions"

Using ZenML repository root: /workspace
Ensure this is the top-level of your repo (.zen must live here).


In [2]:
# Inputs

def _pick_pred(pred_dir: Path, main_path: Path) -> Path | None:
    candidates = [
        pred_dir / f"{main_path.stem}_preds.parquet",
        pred_dir / f"{main_path.stem}_preds_latest.parquet",
    ]
    for c in candidates:
        if c.exists():
            return c
    return None

main_dir = Path(PROJECT_ROOT) / "data"
main_paths = sorted(main_dir.glob("ml_output_*.parquet"))

# Example: uncomment to use fewer files
main_paths = main_paths[:1]

if not main_paths:
    raise RuntimeError(f"No main parquet files found in {main_dir}")

group_probs_dir = Path(PROJECT_ROOT) / "data" / "group_classifier"
group_splitter_dir = Path(PROJECT_ROOT) / "data" / "group_splitter"
endpoint_dir = Path(PROJECT_ROOT) / "data" / "endpoint_regressor"
event_splitter_dir = Path(PROJECT_ROOT) / "data" / "event_splitter"
pion_stop_dir = Path(PROJECT_ROOT) / "data" / "pion_stop_regression"

aligned: list[tuple[Path, Path, Path, Path, Path, Path]] = []
for main in main_paths:
    gp = _pick_pred(group_probs_dir, main)
    gs = _pick_pred(group_splitter_dir, main)
    ep = _pick_pred(endpoint_dir, main)
    es = _pick_pred(event_splitter_dir, main)
    ps = _pick_pred(pion_stop_dir, main)
    if not (gp and gs and ep and es and ps):
        missing = []
        if gp is None:
            missing.append("group_classifier")
        if gs is None:
            missing.append("group_splitter")
        if ep is None:
            missing.append("endpoint_regressor")
        if es is None:
            missing.append("event_splitter")
        if ps is None:
            missing.append("pion_stop_regression")
        raise RuntimeError(f"Missing aligned predictions for {main.name}: {', '.join(missing)}")
    aligned.append((main, gp, gs, ep, es, ps))

parquet_paths = [str(m.resolve()) for (m, _, _, _, _, _) in aligned]
group_probs_parquet_paths = [str(gp.resolve()) for (_, gp, _, _, _, _) in aligned]
group_splitter_parquet_paths = [str(gs.resolve()) for (_, _, gs, _, _, _) in aligned]
endpoint_parquet_paths = [str(ep.resolve()) for (_, _, _, ep, _, _) in aligned]
event_splitter_parquet_paths = [str(es.resolve()) for (_, _, _, _, es, _) in aligned]
pion_stop_parquet_paths = [str(ps.resolve()) for (_, _, _, _, _, ps) in aligned]

model_path = None  # None => use latest torchscript model
output_dir = str((PROJECT_ROOT / "data" / OUTPUT_SUBDIR).resolve())

print(f"Inference shards: {len(parquet_paths)}")
for p in parquet_paths:
    print(" -", p)
print("output_dir:", output_dir)


Inference shards: 1
 - /workspace/data/ml_output_000.parquet
output_dir: /workspace/data/positron_angle_regression


In [3]:
# Run inference pipeline
run = PIPELINE.with_options(enable_cache=False)(
    parquet_paths=parquet_paths,
    group_probs_parquet_paths=group_probs_parquet_paths,
    group_splitter_parquet_paths=group_splitter_parquet_paths,
    endpoint_parquet_paths=endpoint_parquet_paths,
    event_splitter_parquet_paths=event_splitter_parquet_paths,
    pion_stop_parquet_paths=pion_stop_parquet_paths,
    model_path=model_path,
    output_dir=output_dir,
    pipeline_config={
        "loader": {
            "config_json": {
                "mode": "inference",
                "batch_size": 64,
                "chunk_row_groups": 4,
                "chunk_workers": 0,
                "use_group_probs": True,
                "use_splitter_probs": True,
                "use_endpoint_preds": True,
                "use_event_splitter_affinity": True,
                "use_pion_stop_preds": True,
            }
        },
        "save_predictions": {"check_accuracy": False, "write_timestamped": False},
    },
)

export_info = load_step_output(run, SAVE_STEP)
print("export:", export_info)


[37mInitiating a new run for the pipeline: [0m[38;5;105mpositron_angle_regression_inference_pipeline[37m.[0m
[37mCaching is disabled by default for [0m[38;5;105mpositron_angle_regression_inference_pipeline[37m.[0m
[37mUsing user: [0m[38;5;105mdefault[37m[0m
[37mUsing stack: [0m[38;5;105mdefault[37m[0m
[37m  deployer: [0m[38;5;105mdefault[37m[0m
[37m  artifact_store: [0m[38;5;105mdefault[37m[0m
[37m  orchestrator: [0m[38;5;105mdefault[37m[0m
[37mYou can visualize your pipeline runs in the [0m[38;5;105mZenML Dashboard[37m. In order to try it locally, please run [0m[38;5;105mzenml login --local[37m.[0m
[37mStep [0m[38;5;105mload_positron_angle_inference_inputs[37m has started.[0m
[37mStep [0m[38;5;105mload_positron_angle_inference_inputs[37m has finished in [0m[38;5;105m0.489s[37m.[0m
[37mStep [0m[38;5;105mload_positron_angle_model[37m has started.[0m
[37mStep [0m[38;5;105mload_positron_angle_model[37m has finished in [0m[

In [4]:
# Inspect exported outputs
predictions_paths = [Path(p) for p in (export_info.get("predictions_paths") or [])]
if not predictions_paths and export_info.get("predictions_path"):
    predictions_paths = [Path(export_info["predictions_path"])]
metrics_path = Path(export_info["metrics_path"])

print("predictions_paths:")
for p in predictions_paths:
    print(" ", p)
print("metrics:", metrics_path)
print(metrics_path.read_text())


predictions_paths:
  /workspace/data/positron_angle_regression/ml_output_000_preds.parquet
metrics: /workspace/data/positron_angle_regression/metrics_latest.json
{
  "loss": null,
  "mae": null,
  "mode": "positron_angle",
  "model_path": "/workspace/trained_models/positron_angle_regression/positron_angle_20260219_045808_torchscript.pt",
  "output_path": "/workspace/data/positron_angle_regression/ml_output_000_preds.parquet",
  "output_paths": [
    "/workspace/data/positron_angle_regression/ml_output_000_preds.parquet"
  ],
  "prediction_dim": 9,
  "validated_endpoint_files": [
    "/workspace/data/endpoint_regressor/ml_output_000_preds.parquet"
  ],
  "validated_event_splitter_files": [
    "/workspace/data/event_splitter/ml_output_000_preds.parquet"
  ],
  "validated_files": [
    "/workspace/data/ml_output_000.parquet"
  ],
  "validated_group_probs_files": [
    "/workspace/data/group_classifier/ml_output_000_preds.parquet"
  ],
  "validated_group_splitter_files": [
    "/workspace

In [5]:
# Optional: inspect first exported file + free notebook memory
import gc
import pyarrow.parquet as pq

if predictions_paths:
    pf = pq.ParquetFile(predictions_paths[0])
    print("file:", predictions_paths[0])
    print("rows:", pf.metadata.num_rows)
    print(pf.schema_arrow)
    if pf.num_row_groups > 0:
        sample = pf.read_row_group(0).slice(0, 3)
        print(sample)

# Release references to reduce notebook memory pressure.
del run, export_info, predictions_paths
gc.collect()


file: /workspace/data/positron_angle_regression/ml_output_000_preds.parquet
rows: 1024
event_id: int64
time_group_ids: list<element: int64>
  child 0, element: int64
pred_positron_px: list<element: float>
  child 0, element: float
pred_positron_px_q16: list<element: float>
  child 0, element: float
pred_positron_px_q50: list<element: float>
  child 0, element: float
pred_positron_px_q84: list<element: float>
  child 0, element: float
pred_positron_py: list<element: float>
  child 0, element: float
pred_positron_py_q16: list<element: float>
  child 0, element: float
pred_positron_py_q50: list<element: float>
  child 0, element: float
pred_positron_py_q84: list<element: float>
  child 0, element: float
pred_positron_pz: list<element: float>
  child 0, element: float
pred_positron_pz_q16: list<element: float>
  child 0, element: float
pred_positron_pz_q50: list<element: float>
  child 0, element: float
pred_positron_pz_q84: list<element: float>
  child 0, element: float
pyarrow.Table
even

104