Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit ff2a102

Browse files
feat: support unload engine (#989)
Co-authored-by: vansangpfiev <sang@jan.ai>
1 parent 82fb67e commit ff2a102

File tree

2 files changed

+39
-5
lines changed

2 files changed

+39
-5
lines changed

cortex-cpp/controllers/server.cc

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ constexpr static auto kOnnxEngine = "cortex.onnx";
1919
constexpr static auto kTensorrtLlmEngine = "cortex.tensorrt-llm";
2020
} // namespace
2121

22-
server::server(){
22+
server::server() {
2323

24-
// Some default values for now below
25-
// log_disable(); // Disable the log to file feature, reduce bloat for
26-
// target
27-
// system ()
24+
// Some default values for now below
25+
// log_disable(); // Disable the log to file feature, reduce bloat for
26+
// target
27+
// system ()
2828
};
2929

3030
server::~server() {}
@@ -326,6 +326,36 @@ void server::LoadModel(const HttpRequestPtr& req,
326326
LOG_TRACE << "Done load model";
327327
}
328328

329+
void server::UnloadEngine(
330+
const HttpRequestPtr& req,
331+
std::function<void(const HttpResponsePtr&)>&& callback) {
332+
if (!HasFieldInReq(req, callback, "engine")) {
333+
return;
334+
}
335+
336+
auto engine_type =
337+
(*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
338+
if (!IsEngineLoaded(engine_type)) {
339+
Json::Value res;
340+
res["message"] = "Engine is not loaded yet";
341+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
342+
resp->setStatusCode(k409Conflict);
343+
callback(resp);
344+
LOG_WARN << "Engine is not loaded yet";
345+
return;
346+
}
347+
348+
EngineI* e = std::get<EngineI*>(engines_[engine_type].engine);
349+
delete e;
350+
engines_.erase(engine_type);
351+
LOG_INFO << "Unloaded engine " + engine_type;
352+
Json::Value res;
353+
res["message"] = "Unloaded engine " + engine_type;
354+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
355+
resp->setStatusCode(k200OK);
356+
callback(resp);
357+
}
358+
329359
void server::ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
330360
std::shared_ptr<SyncQueue> q) {
331361
auto err_or_done = std::make_shared<std::atomic_bool>(false);

cortex-cpp/controllers/server.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ class server : public drogon::HttpController<server>,
6666
// ADD_METHOD_TO(server::handlePrelight, "/v1/embeddings", Options);
6767

6868
// PATH_ADD("/llama/chat_completion", Post);
69+
METHOD_ADD(server::UnloadEngine, "unloadengine", Post);
70+
6971
METHOD_LIST_END
7072
void ChatCompletion(
7173
const HttpRequestPtr& req,
@@ -91,6 +93,8 @@ class server : public drogon::HttpController<server>,
9193
void FineTuning(
9294
const HttpRequestPtr& req,
9395
std::function<void(const HttpResponsePtr&)>&& callback) override;
96+
void UnloadEngine(const HttpRequestPtr& req,
97+
std::function<void(const HttpResponsePtr&)>&& callback);
9498

9599
private:
96100
void ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,

0 commit comments

Comments
 (0)