diff --git a/inference/core/entities/responses/inference.py b/inference/core/entities/responses/inference.py index 4318b5cfe0..75a136715a 100644 --- a/inference/core/entities/responses/inference.py +++ b/inference/core/entities/responses/inference.py @@ -1,4 +1,5 @@ import base64 +from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Union from uuid import uuid4 @@ -260,6 +261,90 @@ class InstanceSegmentationInferenceResponse( predictions: List[InstanceSegmentationPrediction] +# Dataclass twins used on the workflow-local fast path in +# `InferenceModelsInstanceSegmentationAdapter.postprocess` when +# `kwargs["source"] == "workflow-execution"`. The workflow block consumes +# a plain dict via `_is_response_dc_to_dict` and never needs the pydantic +# interface. HTTP / cache / visualization paths still receive the pydantic +# `InstanceSegmentationInferenceResponse` because they use +# `source != "workflow-execution"`. +@dataclass(slots=True) +class PointDC: + x: float + y: float + + +@dataclass(slots=True) +class InferenceResponseImageDC: + width: int + height: int + + +@dataclass(slots=True) +class InstanceSegmentationPredictionDC: + x: float + y: float + width: float + height: float + confidence: float + class_name: str # serialized as "class" in the dict form + class_id: int + points: list # list[PointDC] + detection_id: str = field(default_factory=lambda: str(uuid4())) + parent_id: object = None + class_confidence: object = None + + +@dataclass(slots=True) +class InstanceSegmentationInferenceResponseDC: + predictions: list # list[InstanceSegmentationPredictionDC] + image: InferenceResponseImageDC + # `Model.infer_from_request` assigns .time and .inference_id after + # construction (see inference/core/models/base.py:154-157); they're + # declared here so the slotted dataclass permits the reassignment. + inference_id: object = None + frame_id: object = None + time: object = None + visualization: object = None + + +def _is_pred_dc_to_dict(p: InstanceSegmentationPredictionDC) -> dict: + """Bit-equivalent to `InstanceSegmentationPrediction(...).model_dump(by_alias=True, exclude_none=True)`.""" + d = { + "x": p.x, + "y": p.y, + "width": p.width, + "height": p.height, + "confidence": p.confidence, + "class": p.class_name, # alias + "class_id": p.class_id, + "detection_id": p.detection_id, + "points": [{"x": pt.x, "y": pt.y} for pt in p.points], + } + if p.class_confidence is not None: + d["class_confidence"] = p.class_confidence + if p.parent_id is not None: + d["parent_id"] = p.parent_id + return d + + +def _is_response_dc_to_dict(r: InstanceSegmentationInferenceResponseDC) -> dict: + """Bit-equivalent to `InstanceSegmentationInferenceResponse(...).model_dump(by_alias=True, exclude_none=True)`.""" + d = { + "image": {"width": r.image.width, "height": r.image.height}, + "predictions": [_is_pred_dc_to_dict(p) for p in r.predictions], + } + if r.inference_id is not None: + d["inference_id"] = r.inference_id + if r.frame_id is not None: + d["frame_id"] = r.frame_id + if r.time is not None: + d["time"] = r.time + if r.visualization is not None: + d["visualization"] = r.visualization + return d + + class SemanticSegmentationInferenceResponse( CvInferenceResponse, WithVisualizationResponse ): diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py index 9be6272e45..c834728f21 100644 --- a/inference/core/models/inference_models_adapters.py +++ b/inference/core/models/inference_models_adapters.py @@ -37,8 +37,11 @@ def _get_pinned_buffer(name: str, shape, dtype: torch.dtype) -> torch.Tensor: ClassificationInferenceResponse, InferenceResponse, InferenceResponseImage, + InferenceResponseImageDC, InstanceSegmentationInferenceResponse, + InstanceSegmentationInferenceResponseDC, InstanceSegmentationPrediction, + InstanceSegmentationPredictionDC, Keypoint, KeypointsDetectionInferenceResponse, KeypointsPrediction, @@ -46,6 +49,7 @@ def _get_pinned_buffer(name: str, shape, dtype: torch.dtype) -> torch.Tensor: ObjectDetectionInferenceResponse, ObjectDetectionPrediction, Point, + PointDC, SemanticSegmentationInferenceResponse, SemanticSegmentationPrediction, ) @@ -330,6 +334,11 @@ def postprocess( predictions, preprocess_return_metadata, **mapped_kwargs ) gpu_fastpath = os.getenv("RFDETR_GPU_POSTPROCESS", "true").lower() in ("true", "1") + # Workflow callers consume a plain dict via `_is_response_dc_to_dict`; + # dataclasses avoid pydantic validation + `model_dump` overhead per + # frame. Every other caller (HTTP, cache, visualization) keeps the + # pydantic path because it depends on the pydantic class identity. + use_dc = kwargs.get("source") == "workflow-execution" responses: List[InstanceSegmentationInferenceResponse] = [] for preproc_metadata, det in zip(preprocess_return_metadata, detections_list): @@ -419,27 +428,53 @@ def postprocess( and class_name not in kwargs["class_filter"] ): continue - predictions.append( - InstanceSegmentationPrediction( - x=cx, - y=cy, - width=w, - height=h, - confidence=float(conf), - points=[ - Point(x=point[0], y=point[1]) for point in mask_as_poly - ], - **{"class": class_name}, - class_id=class_id_int, + if use_dc: + predictions.append( + InstanceSegmentationPredictionDC( + x=cx, + y=cy, + width=w, + height=h, + confidence=float(conf), + class_name=class_name, + class_id=class_id_int, + points=[ + PointDC(x=float(point[0]), y=float(point[1])) + for point in mask_as_poly + ], + ) + ) + else: + predictions.append( + InstanceSegmentationPrediction( + x=cx, + y=cy, + width=w, + height=h, + confidence=float(conf), + points=[ + Point(x=point[0], y=point[1]) + for point in mask_as_poly + ], + **{"class": class_name}, + class_id=class_id_int, + ) ) - ) - responses.append( - InstanceSegmentationInferenceResponse( - predictions=predictions, - image=InferenceResponseImage(width=W, height=H), + if use_dc: + responses.append( + InstanceSegmentationInferenceResponseDC( + predictions=predictions, + image=InferenceResponseImageDC(width=W, height=H), + ) + ) + else: + responses.append( + InstanceSegmentationInferenceResponse( + predictions=predictions, + image=InferenceResponseImage(width=W, height=H), + ) ) - ) return responses def clear_cache(self, delete_from_disk: bool = True) -> None: diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py index 4e9e62eb16..7b59803070 100644 --- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py +++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py @@ -5,6 +5,10 @@ from inference.core.entities.requests.inference import ( InstanceSegmentationInferenceRequest, ) +from inference.core.entities.responses.inference import ( + InstanceSegmentationInferenceResponseDC, + _is_response_dc_to_dict, +) from inference.core.env import ( HOSTED_INSTANCE_SEGMENTATION_URL, LOCAL_INFERENCE_API_URL, @@ -327,8 +331,15 @@ def run_locally( ) if not isinstance(predictions, list): predictions = [predictions] + # The adapter returns dataclass responses when source="workflow-execution" + # (cheaper construct + dict-walk than pydantic). Any other response type + # (e.g. if a non-rfdetr backend is bound to the same block) falls back + # to `model_dump`. predictions = [ - e.model_dump(by_alias=True, exclude_none=True) for e in predictions + _is_response_dc_to_dict(e) + if isinstance(e, InstanceSegmentationInferenceResponseDC) + else e.model_dump(by_alias=True, exclude_none=True) + for e in predictions ] return self._post_process_result( images=images,