Merge branch 'dev' of github.com:janhq/nitro into feat/python-runtime-engine

vansangpfiev · vansangpfiev · commit 9dc157d09c84 · 2024-05-24T16:16:31.000+07:00
diff --git a/cortex-cpp/common/base.h b/cortex-cpp/common/base.h
@@ -17,6 +17,9 @@ class BaseModel {
   virtual void ModelStatus(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) = 0;
+  virtual void GetModels(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) = 0;
   virtual void GetEngines(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) = 0;
diff --git a/cortex-cpp/controllers/server.cc b/cortex-cpp/controllers/server.cc
@@ -140,6 +140,43 @@ void server::ModelStatus(
   LOG_TRACE << "Done get model status";
 }
 
+void server::GetModels(const HttpRequestPtr& req,
+                       std::function<void(const HttpResponsePtr&)>&& callback) {
+  auto engine_type =
+      (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
+  if (!IsEngineLoaded(engine_type)) {
+    Json::Value res;
+    res["message"] = "Engine is not loaded yet";
+    auto resp = cortex_utils::nitroHttpJsonResponse(res);
+    resp->setStatusCode(k409Conflict);
+    callback(resp);
+    LOG_WARN << "Engine is not loaded yet";
+    return;
+  }
+
+  LOG_TRACE << "Start to get models";
+  auto& en = std::get<EngineI*>(engines_[engine_type].engine);
+  if (en->IsSupported("GetModels")) {
+    en->GetModels(
+        req->getJsonObject(),
+        [cb = std::move(callback)](Json::Value status, Json::Value res) {
+          auto resp = cortex_utils::nitroHttpJsonResponse(res);
+          resp->setStatusCode(static_cast<drogon::HttpStatusCode>(
+              status["status_code"].asInt()));
+          cb(resp);
+        });
+  } else {
+    Json::Value res;
+    res["message"] = "Method is not supported yet";
+    auto resp = cortex_utils::nitroHttpJsonResponse(res);
+    resp->setStatusCode(k500InternalServerError);
+    callback(resp);
+    LOG_WARN << "Method is not supported yet";
+  }
+
+  LOG_TRACE << "Done get models";
+}
+
 void server::GetEngines(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
@@ -233,6 +270,7 @@ void server::LoadModel(const HttpRequestPtr& req,
           cortex_utils::GetCurrentPath() + get_engine_path(engine_type);
       engines_[engine_type].dl =
           std::make_unique<cortex_cpp::dylib>(abs_path, "engine");
+
     } catch (const cortex_cpp::dylib::load_error& e) {
       LOG_ERROR << "Could not load engine: " << e.what();
       engines_.erase(engine_type);
diff --git a/cortex-cpp/controllers/server.h b/cortex-cpp/controllers/server.h
@@ -48,14 +48,17 @@ class server : public drogon::HttpController<server>,
   METHOD_ADD(server::LoadModel, "loadmodel", Post);
   METHOD_ADD(server::UnloadModel, "unloadmodel", Post);
   METHOD_ADD(server::ModelStatus, "modelstatus", Post);
+  METHOD_ADD(server::GetModels, "models", Get);
   METHOD_ADD(server::GetEngines, "engines", Get);
 
   // cortex.python API
   METHOD_ADD(server::FineTuning, "finetuning", Post);
 
   // Openai compatible path
   ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post);
+  ADD_METHOD_TO(server::GetModels, "/v1/models", Get);
   ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Post);
+
   // ADD_METHOD_TO(server::handlePrelight, "/v1/chat/completions", Options);
   // NOTE: prelight will be added back when browser support is properly planned
 
@@ -79,10 +82,12 @@ class server : public drogon::HttpController<server>,
   void ModelStatus(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) override;
+  void GetModels(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) override;
   void GetEngines(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) override;
-
   void FineTuning(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) override;
diff --git a/cortex-cpp/cortex-common/EngineI.h b/cortex-cpp/cortex-common/EngineI.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <functional>
+#include <iostream>
 #include <memory>
 
 #include "json/value.h"
@@ -25,4 +26,12 @@ class EngineI {
   virtual void GetModelStatus(
       std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
+
+  // For backward compatible checking
+  virtual bool IsSupported(const std::string& f) = 0;
+
+  // Get list of running models
+  virtual void GetModels(
+      std::shared_ptr<Json::Value> jsonBody,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
 };