aseembits93 · aseembits93 · Apr 30, 2026
@@ -1,4 +1,5 @@
 import base64
+from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional, Union
 from uuid import uuid4
 
@@ -260,6 +261,90 @@ class InstanceSegmentationInferenceResponse(
     predictions: List[InstanceSegmentationPrediction]
 
 
+# Dataclass twins used on the workflow-local fast path in
+# `InferenceModelsInstanceSegmentationAdapter.postprocess` when
+# `kwargs["source"] == "workflow-execution"`. The workflow block consumes
+# a plain dict via `_is_response_dc_to_dict` and never needs the pydantic
+# interface. HTTP / cache / visualization paths still receive the pydantic
+# `InstanceSegmentationInferenceResponse` because they use
+# `source != "workflow-execution"`.
+@dataclass(slots=True)
+class PointDC:
+    x: float
+    y: float
+
+
+@dataclass(slots=True)
+class InferenceResponseImageDC:
+    width: int
+    height: int
+
+
+@dataclass(slots=True)
+class InstanceSegmentationPredictionDC:
+    x: float
+    y: float
+    width: float
+    height: float
+    confidence: float
+    class_name: str  # serialized as "class" in the dict form
+    class_id: int
+    points: list  # list[PointDC]
+    detection_id: str = field(default_factory=lambda: str(uuid4()))
+    parent_id: object = None
+    class_confidence: object = None
+
+
+@dataclass(slots=True)
+class InstanceSegmentationInferenceResponseDC:
+    predictions: list  # list[InstanceSegmentationPredictionDC]
+    image: InferenceResponseImageDC
+    # `Model.infer_from_request` assigns .time and .inference_id after
+    # construction (see inference/core/models/base.py:154-157); they're
+    # declared here so the slotted dataclass permits the reassignment.
+    inference_id: object = None
+    frame_id: object = None
+    time: object = None
+    visualization: object = None
+
+
+def _is_pred_dc_to_dict(p: InstanceSegmentationPredictionDC) -> dict:
+    """Bit-equivalent to `InstanceSegmentationPrediction(...).model_dump(by_alias=True, exclude_none=True)`."""
+    d = {
+        "x": p.x,
+        "y": p.y,
+        "width": p.width,
+        "height": p.height,
+        "confidence": p.confidence,
+        "class": p.class_name,  # alias
+        "class_id": p.class_id,
+        "detection_id": p.detection_id,
+        "points": [{"x": pt.x, "y": pt.y} for pt in p.points],
+    }
+    if p.class_confidence is not None:
+        d["class_confidence"] = p.class_confidence
+    if p.parent_id is not None:
+        d["parent_id"] = p.parent_id
+    return d
+
+
+def _is_response_dc_to_dict(r: InstanceSegmentationInferenceResponseDC) -> dict:
+    """Bit-equivalent to `InstanceSegmentationInferenceResponse(...).model_dump(by_alias=True, exclude_none=True)`."""
+    d = {
+        "image": {"width": r.image.width, "height": r.image.height},
+        "predictions": [_is_pred_dc_to_dict(p) for p in r.predictions],
+    }
+    if r.inference_id is not None:
+        d["inference_id"] = r.inference_id
+    if r.frame_id is not None:
+        d["frame_id"] = r.frame_id
+    if r.time is not None:
+        d["time"] = r.time
+    if r.visualization is not None:
+        d["visualization"] = r.visualization
+    return d
+
+
 class SemanticSegmentationInferenceResponse(
     CvInferenceResponse, WithVisualizationResponse
 ):

@@ -37,15 +37,19 @@ def _get_pinned_buffer(name: str, shape, dtype: torch.dtype) -> torch.Tensor:
     ClassificationInferenceResponse,
     InferenceResponse,
     InferenceResponseImage,
+    InferenceResponseImageDC,
     InstanceSegmentationInferenceResponse,
+    InstanceSegmentationInferenceResponseDC,
     InstanceSegmentationPrediction,
+    InstanceSegmentationPredictionDC,
     Keypoint,
     KeypointsDetectionInferenceResponse,
     KeypointsPrediction,
     MultiLabelClassificationInferenceResponse,
     ObjectDetectionInferenceResponse,
     ObjectDetectionPrediction,
     Point,
+    PointDC,
     SemanticSegmentationInferenceResponse,
     SemanticSegmentationPrediction,
 )
@@ -330,6 +334,11 @@ def postprocess(
             predictions, preprocess_return_metadata, **mapped_kwargs
         )
         gpu_fastpath = os.getenv("RFDETR_GPU_POSTPROCESS", "true").lower() in ("true", "1")
+        # Workflow callers consume a plain dict via `_is_response_dc_to_dict`;
+        # dataclasses avoid pydantic validation + `model_dump` overhead per
+        # frame. Every other caller (HTTP, cache, visualization) keeps the
+        # pydantic path because it depends on the pydantic class identity.
+        use_dc = kwargs.get("source") == "workflow-execution"
 
         responses: List[InstanceSegmentationInferenceResponse] = []
         for preproc_metadata, det in zip(preprocess_return_metadata, detections_list):
@@ -419,27 +428,53 @@ def postprocess(
                     and class_name not in kwargs["class_filter"]
                 ):
                     continue
-                predictions.append(
-                    InstanceSegmentationPrediction(
-                        x=cx,
-                        y=cy,
-                        width=w,
-                        height=h,
-                        confidence=float(conf),
-                        points=[
-                            Point(x=point[0], y=point[1]) for point in mask_as_poly
-                        ],
-                        **{"class": class_name},
-                        class_id=class_id_int,
+                if use_dc:
+                    predictions.append(
+                        InstanceSegmentationPredictionDC(
+                            x=cx,
+                            y=cy,
+                            width=w,
+                            height=h,
+                            confidence=float(conf),
+                            class_name=class_name,
+                            class_id=class_id_int,
+                            points=[
+                                PointDC(x=float(point[0]), y=float(point[1]))
+                                for point in mask_as_poly
+                            ],
+                        )
+                    )
+                else:
+                    predictions.append(
+                        InstanceSegmentationPrediction(
+                            x=cx,
+                            y=cy,
+                            width=w,
+                            height=h,
+                            confidence=float(conf),
+                            points=[
+                                Point(x=point[0], y=point[1])
+                                for point in mask_as_poly
+                            ],
+                            **{"class": class_name},
+                            class_id=class_id_int,
+                        )
                     )
-                )
 
-            responses.append(
-                InstanceSegmentationInferenceResponse(
-                    predictions=predictions,
-                    image=InferenceResponseImage(width=W, height=H),
+            if use_dc:
+                responses.append(
+                    InstanceSegmentationInferenceResponseDC(
+                        predictions=predictions,
+                        image=InferenceResponseImageDC(width=W, height=H),
+                    )
+                )
+            else:
+                responses.append(
+                    InstanceSegmentationInferenceResponse(
+                        predictions=predictions,
+                        image=InferenceResponseImage(width=W, height=H),
+                    )
                 )
-            )
         return responses
 
     def clear_cache(self, delete_from_disk: bool = True) -> None:

@@ -5,6 +5,10 @@
 from inference.core.entities.requests.inference import (
     InstanceSegmentationInferenceRequest,
 )
+from inference.core.entities.responses.inference import (
+    InstanceSegmentationInferenceResponseDC,
+    _is_response_dc_to_dict,
+)
 from inference.core.env import (
     HOSTED_INSTANCE_SEGMENTATION_URL,
     LOCAL_INFERENCE_API_URL,
@@ -327,8 +331,15 @@ def run_locally(
         )
         if not isinstance(predictions, list):
             predictions = [predictions]
+        # The adapter returns dataclass responses when source="workflow-execution"
+        # (cheaper construct + dict-walk than pydantic). Any other response type
+        # (e.g. if a non-rfdetr backend is bound to the same block) falls back
+        # to `model_dump`.
         predictions = [
-            e.model_dump(by_alias=True, exclude_none=True) for e in predictions
+            _is_response_dc_to_dict(e)
+            if isinstance(e, InstanceSegmentationInferenceResponseDC)
+            else e.model_dump(by_alias=True, exclude_none=True)
+            for e in predictions
         ]
         return self._post_process_result(
             images=images,