diff --git a/xllm/api_service/api_service.cpp b/xllm/api_service/api_service.cpp old mode 100644 new mode 100755 index 69441f26..204b9f90 --- a/xllm/api_service/api_service.cpp +++ b/xllm/api_service/api_service.cpp @@ -146,8 +146,8 @@ void ChatCompletionsImpl(std::unique_ptr& service, return; } - auto call = - std::make_shared(ctrl, guard.release(), req_pb, resp_pb); + auto call = std::make_shared( + ctrl, guard.release(), req_pb, resp_pb, arena != nullptr /*use_arena*/); service->process_async(call); } } // namespace @@ -166,17 +166,18 @@ void APIService::ChatCompletionsHttp( return; } - auto arena = response->GetArena(); auto ctrl = reinterpret_cast(controller); if (FLAGS_backend == "llm") { + auto arena = response->GetArena(); CHECK(chat_service_impl_) << " chat service is invalid."; ChatCompletionsImpl( chat_service_impl_, done_guard, arena, ctrl); } else if (FLAGS_backend == "vlm") { CHECK(mm_chat_service_impl_) << " mm chat service is invalid."; + // TODO: fix me - temporarily using heap allocation instead of arena ChatCompletionsImpl( - mm_chat_service_impl_, done_guard, arena, ctrl); + mm_chat_service_impl_, done_guard, nullptr, ctrl); } } diff --git a/xllm/api_service/stream_call.h b/xllm/api_service/stream_call.h old mode 100644 new mode 100755 index 15124cc3..34f763fd --- a/xllm/api_service/stream_call.h +++ b/xllm/api_service/stream_call.h @@ -39,8 +39,13 @@ class StreamCall : public Call { StreamCall(brpc::Controller* controller, ::google::protobuf::Closure* done, Request* request, - Response* response) - : Call(controller), done_(done), request_(request), response_(response) { + Response* response, + bool use_arena = true) + : Call(controller), + done_(done), + request_(request), + response_(response), + use_arena_(use_arena) { stream_ = request_->stream(); if (stream_) { pa_ = controller_->CreateProgressiveAttachment(); @@ -67,6 +72,10 @@ class StreamCall : public Call { if (!stream_) { done_->Run(); } + if (!use_arena_) { + delete request_; + delete response_; + } } bool write_and_finish(Response& response) { @@ -142,6 +151,7 @@ class StreamCall : public Call { Response* response_; bool stream_ = false; + bool use_arena_ = true; butil::intrusive_ptr pa_; butil::IOBuf io_buf_;