@@ -19,12 +19,12 @@ constexpr static auto kOnnxEngine = "cortex.onnx";
1919constexpr static auto kTensorrtLlmEngine = " cortex.tensorrt-llm" ;
2020} // namespace
2121
22- server::server (){
22+ server::server () {
2323
24- // Some default values for now below
25- // log_disable(); // Disable the log to file feature, reduce bloat for
26- // target
27- // system ()
24+ // Some default values for now below
25+ // log_disable(); // Disable the log to file feature, reduce bloat for
26+ // target
27+ // system ()
2828};
2929
3030server::~server () {}
@@ -326,6 +326,36 @@ void server::LoadModel(const HttpRequestPtr& req,
326326 LOG_TRACE << " Done load model" ;
327327}
328328
329+ void server::UnloadEngine (
330+ const HttpRequestPtr& req,
331+ std::function<void (const HttpResponsePtr&)>&& callback) {
332+ if (!HasFieldInReq (req, callback, " engine" )) {
333+ return ;
334+ }
335+
336+ auto engine_type =
337+ (*(req->getJsonObject ())).get (" engine" , cur_engine_type_).asString ();
338+ if (!IsEngineLoaded (engine_type)) {
339+ Json::Value res;
340+ res[" message" ] = " Engine is not loaded yet" ;
341+ auto resp = cortex_utils::CreateCortexHttpJsonResponse (res);
342+ resp->setStatusCode (k409Conflict);
343+ callback (resp);
344+ LOG_WARN << " Engine is not loaded yet" ;
345+ return ;
346+ }
347+
348+ EngineI* e = std::get<EngineI*>(engines_[engine_type].engine );
349+ delete e;
350+ engines_.erase (engine_type);
351+ LOG_INFO << " Unloaded engine " + engine_type;
352+ Json::Value res;
353+ res[" message" ] = " Unloaded engine " + engine_type;
354+ auto resp = cortex_utils::CreateCortexHttpJsonResponse (res);
355+ resp->setStatusCode (k200OK);
356+ callback (resp);
357+ }
358+
329359void server::ProcessStreamRes (std::function<void (const HttpResponsePtr&)> cb,
330360 std::shared_ptr<SyncQueue> q) {
331361 auto err_or_done = std::make_shared<std::atomic_bool>(false );
0 commit comments