janhq · tikikun · Dec 5, 2023 · Dec 4, 2023 · Dec 4, 2023 · Dec 4, 2023
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -359,47 +359,32 @@ void llamaCPP::modelStatus(
   return;
 }
 
-void llamaCPP::loadModel(
-    const HttpRequestPtr &req,
-    std::function<void(const HttpResponsePtr &)> &&callback) {
-
-  if (model_loaded) {
-    LOG_INFO << "model loaded";
-    Json::Value jsonResp;
-    jsonResp["message"] = "Model already loaded";
-    auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
-    resp->setStatusCode(drogon::k409Conflict);
-    callback(resp);
-    return;
-  }
-
-  const auto &jsonBody = req->getJsonObject();
+bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
 
   gpt_params params;
 
   // By default will setting based on number of handlers
   int drogon_thread = drogon::app().getThreadNum();
   LOG_INFO << "Drogon thread is:" << drogon_thread;
   if (jsonBody) {
-    params.model = (*jsonBody)["llama_model_path"].asString();
-    params.n_gpu_layers = (*jsonBody).get("ngl", 100).asInt();
-    params.n_ctx = (*jsonBody).get("ctx_len", 2048).asInt();
-    params.embedding = (*jsonBody).get("embedding", true).asBool();
+    params.model = jsonBody["llama_model_path"].asString();
+    params.n_gpu_layers = jsonBody.get("ngl", 100).asInt();
+    params.n_ctx = jsonBody.get("ctx_len", 2048).asInt();
+    params.embedding = jsonBody.get("embedding", true).asBool();
     // Check if n_parallel exists in jsonBody, if not, set to drogon_thread
-    params.n_batch = (*jsonBody).get("n_batch", 512).asInt();
-    params.n_parallel = (*jsonBody).get("n_parallel", drogon_thread).asInt();
+    params.n_batch = jsonBody.get("n_batch", 512).asInt();
+    params.n_parallel = jsonBody.get("n_parallel", drogon_thread).asInt();
     params.n_threads =
-        (*jsonBody)
-            .get("cpu_threads", std::thread::hardware_concurrency())
+        jsonBody.get("cpu_threads", std::thread::hardware_concurrency())
             .asInt();
-    params.cont_batching = (*jsonBody).get("cont_batching", false).asBool();
+    params.cont_batching = jsonBody.get("cont_batching", false).asBool();
 
-    this->user_prompt = (*jsonBody).get("user_prompt", "USER: ").asString();
-    this->ai_prompt = (*jsonBody).get("ai_prompt", "ASSISTANT: ").asString();
+    this->user_prompt = jsonBody.get("user_prompt", "USER: ").asString();
+    this->ai_prompt = jsonBody.get("ai_prompt", "ASSISTANT: ").asString();
     this->system_prompt =
-        (*jsonBody).get("system_prompt", "ASSISTANT's RULE: ").asString();
-    this->pre_prompt = (*jsonBody).get("pre_prompt", "").asString();
-    this->repeat_last_n = (*jsonBody).get("repeat_last_n", 32).asInt();
+        jsonBody.get("system_prompt", "ASSISTANT's RULE: ").asString();
+    this->pre_prompt = jsonBody.get("pre_prompt", "").asString();
+    this->repeat_last_n = jsonBody.get("repeat_last_n", 32).asInt();
   }
 #ifdef GGML_USE_CUBLAS
   LOG_INFO << "Setting up GGML CUBLAS PARAMS";
@@ -422,25 +407,46 @@ void llamaCPP::loadModel(
 
   // load the model
   if (!llama.load_model(params)) {
-    LOG_ERROR << "Error loading the model will exit the program";
-    Json::Value jsonResp;
-    jsonResp["message"] = "Failed to load model";
-    auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
-    resp->setStatusCode(drogon::k500InternalServerError);
-    callback(resp);
+    LOG_ERROR << "Error loading the model";
+    return false; // Indicate failure
   }
   llama.initialize();
-
-  Json::Value jsonResp;
-  jsonResp["message"] = "Model loaded successfully";
   model_loaded = true;
-  auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
-
   LOG_INFO << "Started background task here!";
   backgroundThread = std::thread(&llamaCPP::backgroundTask, this);
   warmupModel();
+  return true;
+}
 
-  callback(resp);
+void llamaCPP::loadModel(
+    const HttpRequestPtr &req,
+    std::function<void(const HttpResponsePtr &)> &&callback) {
+
+  if (model_loaded) {
+    LOG_INFO << "model loaded";
+    Json::Value jsonResp;
+    jsonResp["message"] = "Model already loaded";
+    auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
+    resp->setStatusCode(drogon::k409Conflict);
+    callback(resp);
+    return;
+  }
+
+  const auto &jsonBody = req->getJsonObject();
+  if (!loadModelImpl(*jsonBody)) {
+    // Error occurred during model loading
+    Json::Value jsonResp;
+    jsonResp["message"] = "Failed to load model";
+    auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
+    resp->setStatusCode(drogon::k500InternalServerError);
+    callback(resp);
+  } else {
+    // Model loaded successfully
+    Json::Value jsonResp;
+    jsonResp["message"] = "Model loaded successfully";
+    auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
+    callback(resp);
+  }
 }
 
 void llamaCPP::backgroundTask() {

diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
@@ -1,3 +1,5 @@
+#include <iostream>
+#include <string>
 #if defined(_WIN32)
 #define NOMINMAX
 #endif
@@ -2117,6 +2119,25 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
     // log_disable(); // Disable the log to file feature, reduce bloat for
     // target
     // system ()
+    std::vector<std::string> llama_models =
+        nitro_utils::listFilesInDir(nitro_utils::models_folder);
+    std::string model_index;
+    if (llama_models.size() > 0) {
+      LOG_INFO << "Found models folder, here are the llama models you have:";
+      int index_val = 0;
+      for (auto llama_model : llama_models) {
+        LOG_INFO << "index: " << index_val++ << "| model: " << llama_model;
+        std::cout
+            << "Please type the index of the model you want to load here >>  ";
+        std::cin >> model_index;
+        Json::Value jsonBody;
+        jsonBody["llama_model_path"] = nitro_utils::models_folder + "/" +
+                                       llama_models[std::stoi(model_index)];
+        loadModelImpl(jsonBody);
+      }
+    } else {
+      LOG_INFO << "Not found models folder, start server as usual";
+    }
   }
 
   METHOD_LIST_BEGIN
@@ -2145,6 +2166,8 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
   void modelStatus(const HttpRequestPtr &req,
                    std::function<void(const HttpResponsePtr &)> &&callback);
 
+  bool loadModelImpl(const Json::Value &jsonBody);
+
   void warmupModel();
 
   void backgroundTask();

diff --git a/utils/nitro_utils.h b/utils/nitro_utils.h
@@ -6,9 +6,51 @@
 #include <drogon/HttpResponse.h>
 #include <iostream>
 #include <ostream>
+// Include platform-specific headers
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <dirent.h>
+#endif
 
 namespace nitro_utils {
 
+inline std::string models_folder = "./models";
+
+inline std::vector<std::string> listFilesInDir(const std::string &path) {
+  std::vector<std::string> files;
+
+#ifdef _WIN32
+  // Windows-specific code
+  WIN32_FIND_DATA findFileData;
+  HANDLE hFind = FindFirstFile((path + "\\*").c_str(), &findFileData);
+
+  if (hFind != INVALID_HANDLE_VALUE) {
+    do {
+      if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+        files.push_back(findFileData.cFileName);
+      }
+    } while (FindNextFile(hFind, &findFileData) != 0);
+    FindClose(hFind);
+  }
+#else
+  // POSIX-specific code (Linux, Unix, MacOS)
+  DIR *dir;
+  struct dirent *ent;
+
+  if ((dir = opendir(path.c_str())) != NULL) {
+    while ((ent = readdir(dir)) != NULL) {
+      if (ent->d_type == DT_REG) { // Check if it's a regular file
+        files.push_back(ent->d_name);
+      }
+    }
+    closedir(dir);
+  }
+#endif
+
+  return files;
+}
+
 inline std::string rtrim(const std::string &str) {
   size_t end = str.find_last_not_of("\n\t ");
   return (end == std::string::npos) ? "" : str.substr(0, end + 1);