diff --git a/src/_bentoml_impl/server/app.py b/src/_bentoml_impl/server/app.py index 10629508a4b..d988581cd74 100644 --- a/src/_bentoml_impl/server/app.py +++ b/src/_bentoml_impl/server/app.py @@ -14,6 +14,7 @@ from simple_di import Provide from simple_di import inject from starlette.middleware import Middleware +from starlette.responses import Response from starlette.staticfiles import StaticFiles from _bentoml_sdk import Service @@ -30,7 +31,6 @@ from opentelemetry.sdk.trace import Span from starlette.applications import Starlette from starlette.requests import Request - from starlette.responses import Response from starlette.routing import BaseRoute from bentoml._internal import external_typing as ext @@ -469,7 +469,10 @@ async def inner() -> t.AsyncGenerator[t.Any, None]: else: output = await self._to_thread(func, *input_args, **input_params) - response = await method.output_spec.to_http_response(output, serde) + if isinstance(output, Response): + response = output + else: + response = await method.output_spec.to_http_response(output, serde) response.headers.update({"Server": f"BentoML Service/{self.service.name}"}) if method.ctx_param is not None: