Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions inference/core/entities/responses/inference.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import base64
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
from uuid import uuid4

Expand Down Expand Up @@ -260,6 +261,90 @@ class InstanceSegmentationInferenceResponse(
predictions: List[InstanceSegmentationPrediction]


# Dataclass twins used on the workflow-local fast path in
# `InferenceModelsInstanceSegmentationAdapter.postprocess` when
# `kwargs["source"] == "workflow-execution"`. The workflow block consumes
# a plain dict via `_is_response_dc_to_dict` and never needs the pydantic
# interface. HTTP / cache / visualization paths still receive the pydantic
# `InstanceSegmentationInferenceResponse` because they use
# `source != "workflow-execution"`.
@dataclass(slots=True)
class PointDC:
x: float
y: float


@dataclass(slots=True)
class InferenceResponseImageDC:
width: int
height: int


@dataclass(slots=True)
class InstanceSegmentationPredictionDC:
x: float
y: float
width: float
height: float
confidence: float
class_name: str # serialized as "class" in the dict form
class_id: int
points: list # list[PointDC]
detection_id: str = field(default_factory=lambda: str(uuid4()))
parent_id: object = None
class_confidence: object = None


@dataclass(slots=True)
class InstanceSegmentationInferenceResponseDC:
predictions: list # list[InstanceSegmentationPredictionDC]
image: InferenceResponseImageDC
# `Model.infer_from_request` assigns .time and .inference_id after
# construction (see inference/core/models/base.py:154-157); they're
# declared here so the slotted dataclass permits the reassignment.
inference_id: object = None
frame_id: object = None
time: object = None
visualization: object = None


def _is_pred_dc_to_dict(p: InstanceSegmentationPredictionDC) -> dict:
"""Bit-equivalent to `InstanceSegmentationPrediction(...).model_dump(by_alias=True, exclude_none=True)`."""
d = {
"x": p.x,
"y": p.y,
"width": p.width,
"height": p.height,
"confidence": p.confidence,
"class": p.class_name, # alias
"class_id": p.class_id,
"detection_id": p.detection_id,
"points": [{"x": pt.x, "y": pt.y} for pt in p.points],
}
if p.class_confidence is not None:
d["class_confidence"] = p.class_confidence
if p.parent_id is not None:
d["parent_id"] = p.parent_id
return d


def _is_response_dc_to_dict(r: InstanceSegmentationInferenceResponseDC) -> dict:
"""Bit-equivalent to `InstanceSegmentationInferenceResponse(...).model_dump(by_alias=True, exclude_none=True)`."""
d = {
"image": {"width": r.image.width, "height": r.image.height},
"predictions": [_is_pred_dc_to_dict(p) for p in r.predictions],
}
if r.inference_id is not None:
d["inference_id"] = r.inference_id
if r.frame_id is not None:
d["frame_id"] = r.frame_id
if r.time is not None:
d["time"] = r.time
if r.visualization is not None:
d["visualization"] = r.visualization
return d


class SemanticSegmentationInferenceResponse(
CvInferenceResponse, WithVisualizationResponse
):
Expand Down
71 changes: 53 additions & 18 deletions inference/core/models/inference_models_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,19 @@ def _get_pinned_buffer(name: str, shape, dtype: torch.dtype) -> torch.Tensor:
ClassificationInferenceResponse,
InferenceResponse,
InferenceResponseImage,
InferenceResponseImageDC,
InstanceSegmentationInferenceResponse,
InstanceSegmentationInferenceResponseDC,
InstanceSegmentationPrediction,
InstanceSegmentationPredictionDC,
Keypoint,
KeypointsDetectionInferenceResponse,
KeypointsPrediction,
MultiLabelClassificationInferenceResponse,
ObjectDetectionInferenceResponse,
ObjectDetectionPrediction,
Point,
PointDC,
SemanticSegmentationInferenceResponse,
SemanticSegmentationPrediction,
)
Expand Down Expand Up @@ -330,6 +334,11 @@ def postprocess(
predictions, preprocess_return_metadata, **mapped_kwargs
)
gpu_fastpath = os.getenv("RFDETR_GPU_POSTPROCESS", "true").lower() in ("true", "1")
# Workflow callers consume a plain dict via `_is_response_dc_to_dict`;
# dataclasses avoid pydantic validation + `model_dump` overhead per
# frame. Every other caller (HTTP, cache, visualization) keeps the
# pydantic path because it depends on the pydantic class identity.
use_dc = kwargs.get("source") == "workflow-execution"

responses: List[InstanceSegmentationInferenceResponse] = []
for preproc_metadata, det in zip(preprocess_return_metadata, detections_list):
Expand Down Expand Up @@ -419,27 +428,53 @@ def postprocess(
and class_name not in kwargs["class_filter"]
):
continue
predictions.append(
InstanceSegmentationPrediction(
x=cx,
y=cy,
width=w,
height=h,
confidence=float(conf),
points=[
Point(x=point[0], y=point[1]) for point in mask_as_poly
],
**{"class": class_name},
class_id=class_id_int,
if use_dc:
predictions.append(
InstanceSegmentationPredictionDC(
x=cx,
y=cy,
width=w,
height=h,
confidence=float(conf),
class_name=class_name,
class_id=class_id_int,
points=[
PointDC(x=float(point[0]), y=float(point[1]))
for point in mask_as_poly
],
)
)
else:
predictions.append(
InstanceSegmentationPrediction(
x=cx,
y=cy,
width=w,
height=h,
confidence=float(conf),
points=[
Point(x=point[0], y=point[1])
for point in mask_as_poly
],
**{"class": class_name},
class_id=class_id_int,
)
)
)

responses.append(
InstanceSegmentationInferenceResponse(
predictions=predictions,
image=InferenceResponseImage(width=W, height=H),
if use_dc:
responses.append(
InstanceSegmentationInferenceResponseDC(
predictions=predictions,
image=InferenceResponseImageDC(width=W, height=H),
)
)
else:
responses.append(
InstanceSegmentationInferenceResponse(
predictions=predictions,
image=InferenceResponseImage(width=W, height=H),
)
)
)
return responses

def clear_cache(self, delete_from_disk: bool = True) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
from inference.core.entities.requests.inference import (
InstanceSegmentationInferenceRequest,
)
from inference.core.entities.responses.inference import (
InstanceSegmentationInferenceResponseDC,
_is_response_dc_to_dict,
)
from inference.core.env import (
HOSTED_INSTANCE_SEGMENTATION_URL,
LOCAL_INFERENCE_API_URL,
Expand Down Expand Up @@ -327,8 +331,15 @@ def run_locally(
)
if not isinstance(predictions, list):
predictions = [predictions]
# The adapter returns dataclass responses when source="workflow-execution"
# (cheaper construct + dict-walk than pydantic). Any other response type
# (e.g. if a non-rfdetr backend is bound to the same block) falls back
# to `model_dump`.
predictions = [
e.model_dump(by_alias=True, exclude_none=True) for e in predictions
_is_response_dc_to_dict(e)
if isinstance(e, InstanceSegmentationInferenceResponseDC)
else e.model_dump(by_alias=True, exclude_none=True)
for e in predictions
]
return self._post_process_result(
images=images,
Expand Down