# Build TensorRT engine (PT or ONNX → TRT)

This notebook lets you create a TensorRT engine without running the full GUI/capture pipeline. It mirrors the DeepStream flow: convert a YOLO `.pt` to ONNX (optional), then use `trtexec` to build the `.engine` that `nvinfer` will reuse.

Notes:
- Run this with the **inference** venv kernel (`environments/inference/.venv`).
- You need TensorRT's `trtexec` on the Jetson (usually `/usr/src/tensorrt/bin/trtexec`).
- For `.pt` conversion this uses the `yolo export` CLI from Ultralytics; install `ultralytics` if not already available (or pre-convert to ONNX yourself).


# what works

- this build engine notebook uses its own uv env (see toml) including onnx, trtexec etc, and ultralytics. you can pass a .pt file (says onnx support but that hasnt been validated yet as i'm unsure what export commands were used to generate the existing onnx files that lack a valid .pt).

- it will export to onnx and then build the TRT engine at full or half precision (not yet setup for int-8 calibration)

- i built a custom parser that, conviently, infers the number of keypoints based on the yolo pose head output dims. it works with both stock 11 pose models that output 17 kpt coco shape as well as our custom mousehouse model with 6 kpts

- the engine and onnx files built in this ipynb are currently saved to /home/jetson/Desktop/squeakview/new_models/ with the suffix .engine. (**Update to artifacts/weights for pt and artifacts/onnx for onnx ***)

- the config created by the ipynb is saved to /home/jetson/Desktop/squeakview/DeepStream-Yolo/configs/ (*** CORRECT place ***)

- The custom parser is at DeepStream-Yolo/nvdsinfer_custom_impl_Yolo/yolo_pose_parser.cpp and is now built into DeepStream-Yolo/nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so. this new parser with do confidence filtering, letterbox UNPadding, and print to the terminal the output tensor dimensions that. the parser is specific to our cuda 12.6 as well so if used on a new cuda version we will need to change that flag and rebuilt the parser using the nvs make file (easy).


# to do



- check efficiency of this full pipeline (make sure no uncessary copies, mem pressure etc)

- also make the keypints look better

- uncap workspace size (wont work at fp32 if capped)

- avoid commented lines in the config make as they can be read 




In [None]:
from __future__ import annotations

import os
import shutil
import subprocess
from pathlib import Path

BASE = Path("/home/jetson/Desktop/SqueakView/DeepStream-Yolo")
PT_DIR = BASE / "artifacts" / "weights"
ONNX_DIR = BASE / "artifacts" / "onnx"
ENGINE_DIR = BASE / "engines"

# ---- Configure your model here ----------------------------------------
# Task: "pose" or "detect" (controls parser in generated config)
TASK = "pose"

# Path to a .pt or .onnx file (defaults to weights dir)
INPUT_MODEL = PT_DIR / "yolo26npose_distilled_FT.pt"

# Where to write the engine (defaults to ENGINE_DIR/<stem>_<precision>.engine)
ENGINE_OUTPUT = None  # or ENGINE_DIR / "custom.engine"

# Network input shape (min/opt/max). Keep consistent with DeepStream infer-dims.
IMG_SIZE = (1, 3, 640, 640)

# Precision: "fp32", "fp16", or "int8" (int8 requires calibration cache not covered here)
PRECISION = "fp16"

# Optional custom plugin used by DeepStream YOLO parser (not required for build, but harmless).
CUSTOM_PLUGIN = BASE / "nvdsinfer_custom_impl_Yolo" / "libnvdsinfer_custom_impl_Yolo.so"

# -----------------------------------------------------------------------
for d in (PT_DIR, ONNX_DIR, ENGINE_DIR):
    d.mkdir(parents=True, exist_ok=True)

PREC_SUFFIX = PRECISION.lower()

INPUT_MODEL = INPUT_MODEL.expanduser().resolve()
if ENGINE_OUTPUT is None:
    ENGINE_OUTPUT = (ENGINE_DIR / f"{INPUT_MODEL.stem}_{PREC_SUFFIX}").with_suffix(".engine")
ENGINE_OUTPUT = ENGINE_OUTPUT.expanduser().resolve()
ENGINE_OUTPUT.parent.mkdir(parents=True, exist_ok=True)

print("Task       :", TASK)
print("Input model:", INPUT_MODEL)
print("Engine out :", ENGINE_OUTPUT)


# verify trtexec is available

In [None]:
def find_trtexec() -> Path:
    candidates = [
        Path(os.environ.get("TRTEXEC") or ""),
        Path("/usr/src/tensorrt/bin/trtexec"),
        Path(shutil.which("trtexec") or ""),
    ]
    for path in candidates:
        if path and path.is_file() and os.access(path, os.X_OK):
            return path
    raise FileNotFoundError("trtexec not found. Set TRTEXEC env var or install TensorRT tools.")

TRTEXEC = find_trtexec()
print("Using trtexec:", TRTEXEC)


# Export .pt --> Onnx

In [None]:
# If INPUT_MODEL is .pt, convert to ONNX using Ultralytics CLI
# (skipped if INPUT_MODEL is already ONNX)
def convert_pt_to_onnx(pt_path: Path, imgsz: tuple[int, int]) -> Path:
    # Ultralytics writes the ONNX next to the .pt by default; we then move/rename to ONNX_DIR with precision suffix
    exported_onnx = pt_path.with_suffix(".onnx")
    onnx_out = (ONNX_DIR / f"{pt_path.stem}_{PREC_SUFFIX}").with_suffix(".onnx")
    yolo_cmd = shutil.which("yolo")
    if not yolo_cmd:
        raise RuntimeError(
            "Ultralytics 'yolo' CLI not found. Install with 'pip install ultralytics' in this env, "
            "or set INPUT_MODEL to an existing ONNX path to skip conversion."
        )
    cmd = [
        yolo_cmd,
        "export",
        f"model={pt_path}",
        "format=onnx",
        f"imgsz={imgsz[0] if isinstance(imgsz, (tuple, list)) else imgsz}",
        "simplify=True",
        "nms=False"
    ]
    print("Converting .pt → ONNX:", " ".join(cmd))
    res = subprocess.run(cmd, text=True)
    if res.returncode != 0:
        raise RuntimeError("Ultralytics conversion failed; ensure 'ultralytics' is installed and yolo CLI is in PATH")
    if not exported_onnx.exists():
        raise FileNotFoundError(f"Expected ONNX not found where Ultralytics writes it: {exported_onnx}")
    onnx_out.parent.mkdir(parents=True, exist_ok=True)
    if onnx_out.resolve() != exported_onnx.resolve():
        shutil.move(str(exported_onnx), str(onnx_out))
    return onnx_out

if INPUT_MODEL.suffix.lower() == ".pt":
    INPUT_MODEL = convert_pt_to_onnx(INPUT_MODEL, imgsz=IMG_SIZE[-2:])
    print("ONNX written to:", INPUT_MODEL)
else:
    assert INPUT_MODEL.exists(), f"Model not found: {INPUT_MODEL}"


# Verify onnx model tensor name

In [None]:
# Determine ONNX input tensor name (DeepStream auto-detects this internally)
INPUT_NAME_OVERRIDE = None  # set to e.g. "images" to force a name if detection fails

def get_onnx_input_name(path: Path) -> str:
    try:
        import onnx  # type: ignore
    except ImportError as exc:
        raise RuntimeError("onnx is required to auto-detect input name; set INPUT_NAME_OVERRIDE to skip detection") from exc
    model = onnx.load(str(path))
    if model.graph.input:
        return model.graph.input[0].name
    initializers = {init.name for init in model.graph.initializer}
    for node in model.graph.node:
        for name in node.input:
            if name not in initializers:
                return name
    raise RuntimeError("No inputs found in ONNX (graph.input empty and no candidate from nodes)")

INPUT_NAME = INPUT_NAME_OVERRIDE or get_onnx_input_name(INPUT_MODEL)
print("ONNX input name:", INPUT_NAME)


# Build TRT Engine (this may take a while)

In [None]:
# Build the engine with trtexec (TensorRT 10.x flags)
import onnx  # ensure onnx is available for shape inspection

# Inspect input shape to decide whether to pass explicit shapes
model = onnx.load(str(INPUT_MODEL))
inp = model.graph.input[0]
type_proto = inp.type.tensor_type
shape_proto = type_proto.shape

dims = []
dynamic = False
for dim in shape_proto.dim:
    if dim.HasField("dim_param") or dim.dim_param:
        dynamic = True
        dims.append(dim.dim_param or "-1")
    else:
        val = dim.dim_value
        if val in (0, None):
            dynamic = True
            dims.append("-1")
        else:
            dims.append(str(val))
            if val < 0:
                dynamic = True

shape_str = "x".join(dims) if dynamic else "x".join(str(v) for v in IMG_SIZE)

cmd = [
    str(TRTEXEC),
    f"--onnx={INPUT_MODEL}",
    f"--saveEngine={ENGINE_OUTPUT}",
]

# Only add shape profile flags if the model is dynamic
if dynamic:
    cmd.extend([
        f"--minShapes={INPUT_NAME}:{shape_str}",
        f"--optShapes={INPUT_NAME}:{shape_str}",
        f"--maxShapes={INPUT_NAME}:{shape_str}",
    ])

prec = PRECISION.lower()
if prec == "fp16":
    cmd.append("--fp16")
elif prec == "int8":
    cmd.append("--int8")
elif prec == "fp32":
    pass
else:
    raise ValueError("PRECISION must be one of fp32, fp16, int8")

if CUSTOM_PLUGIN and CUSTOM_PLUGIN.exists():
    cmd.append(f"--staticPlugins={CUSTOM_PLUGIN}")  # replaces --plugins

print("Dynamic input:" if dynamic else "Static input:", dims)
print("Running:", " ".join(cmd))
result = subprocess.run(cmd)
print("Return code:", result.returncode)
if result.returncode != 0:
    raise RuntimeError("trtexec failed; check above logs")


In [None]:
from pathlib import Path

# Auto-generate config without relying on template placeholders
CFG_DIR = Path("/home/jetson/Desktop/SqueakView/DeepStream-Yolo/configs")
CFG_NAME = f"{ENGINE_OUTPUT.stem}.txt"
CFG_PATH = CFG_DIR / CFG_NAME

is_pose = str(TASK).lower() == "pose"
parse_func = "NvDsInferParseYoloV26Pose" if is_pose else "NvDsInferParseYolo"
# class labels for detect; kp labels for pose
labels_path = Path("/home/jetson/Desktop/SqueakView/DeepStream-Yolo/artifacts/labels/mouse_class.txt")
kp_labels_path = Path("/home/jetson/Desktop/SqueakView/DeepStream-Yolo/artifacts/labels/mouse_labels.txt") if is_pose else None

network_mode = 2 if PRECISION.lower() == "fp16" else 0
infer_h, infer_w = IMG_SIZE[-2], IMG_SIZE[-1]
class_count = 1  # adjust per model

config_text = f"""[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
model-color-format=0

onnx-file={INPUT_MODEL}
model-engine-file={ENGINE_OUTPUT}

network-mode={network_mode}                  # 0=FP32, 1=INT8, 2=FP16
network-type=0                  # detector
infer-dims=3;{infer_w};{infer_h}
batch-size=1
output-tensor-meta=1

num-detected-classes={class_count}
labelfile-path={labels_path}
"""

if is_pose and kp_labels_path:
    config_text += f"pose-kpt-labels-path={kp_labels_path}\n"

config_text += f"""parse-bbox-func-name={parse_func}
custom-lib-path=/home/jetson/Desktop/SqueakView/DeepStream-Yolo/nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
engine-create-func-name=NvDsInferYoloCudaEngineGet
"""

if is_pose:
    config_text += "pose-draw-threshold=0.5\n"

config_text += f"""
cluster-mode=2
maintain-aspect-ratio=1
symmetric-padding=1
workspace-size=2048
gie-unique-id=1
interval=0
process-mode=1

[class-attrs-all]
nms-iou-threshold=0.15
pre-cluster-threshold=0.9
topk=20
"""

CFG_PATH.write_text(config_text)
print(f"Config written to {CFG_PATH}")
print("Parser:", parse_func)
