diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 981712463..b76140a25 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -8,6 +8,7 @@ #include #include #include +#include using namespace inferences; using json = nlohmann::json; @@ -174,6 +175,7 @@ void llamaCPP::chatCompletion( json data; json stopWords; + int no_images = 0; // To set default value if (jsonBody) { @@ -200,29 +202,79 @@ void llamaCPP::chatCompletion( (*jsonBody).get("frequency_penalty", 0).asFloat(); data["presence_penalty"] = (*jsonBody).get("presence_penalty", 0).asFloat(); const Json::Value &messages = (*jsonBody)["messages"]; - for (const auto &message : messages) { - std::string input_role = message["role"].asString(); - std::string role; - if (input_role == "user") { - role = user_prompt; - std::string content = message["content"].asString(); - formatted_output += role + content; - } else if (input_role == "assistant") { - role = ai_prompt; - std::string content = message["content"].asString(); - formatted_output += role + content; - } else if (input_role == "system") { - role = system_prompt; - std::string content = message["content"].asString(); - formatted_output = role + content + formatted_output; - } else { - role = input_role; - std::string content = message["content"].asString(); - formatted_output += role + content; + if (!llama.multimodal) { + + for (const auto &message : messages) { + std::string input_role = message["role"].asString(); + std::string role; + if (input_role == "user") { + role = user_prompt; + std::string content = message["content"].asString(); + formatted_output += role + content; + } else if (input_role == "assistant") { + role = ai_prompt; + std::string content = message["content"].asString(); + formatted_output += role + content; + } else if (input_role == "system") { + role = system_prompt; + std::string content = message["content"].asString(); + formatted_output = role + content + formatted_output; + + } else { + role = input_role; + std::string content = message["content"].asString(); + formatted_output += role + content; + } } + formatted_output += ai_prompt; + } else { + + data["image_data"] = json::array(); + for (const auto &message : messages) { + std::string input_role = message["role"].asString(); + std::string role; + if (input_role == "user") { + formatted_output += role; + for (auto content_piece : message["content"]) { + role = user_prompt; + + auto content_piece_type = content_piece["type"].asString(); + if (content_piece_type == "text") { + auto text = content_piece["text"].asString(); + formatted_output += text; + } else if (content_piece_type == "image_url") { + auto image_url = content_piece["image_url"]["url"].asString(); + auto base64_image_data = nitro_utils::extractBase64(image_url); + LOG_INFO << base64_image_data; + formatted_output += "[img-" + std::to_string(no_images) + "]"; + + json content_piece_image_data; + content_piece_image_data["data"] = base64_image_data; + content_piece_image_data["id"] = no_images; + data["image_data"].push_back(content_piece_image_data); + no_images++; + } + } + + } else if (input_role == "assistant") { + role = ai_prompt; + std::string content = message["content"].asString(); + formatted_output += role + content; + } else if (input_role == "system") { + role = system_prompt; + std::string content = message["content"].asString(); + formatted_output = role + content + formatted_output; + + } else { + role = input_role; + std::string content = message["content"].asString(); + formatted_output += role + content; + } + } + formatted_output += ai_prompt; + LOG_INFO << formatted_output; } - formatted_output += ai_prompt; data["prompt"] = formatted_output; for (const auto &stop_word : (*jsonBody)["stop"]) { @@ -386,6 +438,10 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) { int drogon_thread = drogon::app().getThreadNum() - 1; LOG_INFO << "Drogon thread is:" << drogon_thread; if (jsonBody) { + if (!jsonBody["mmproj"].isNull()) { + LOG_INFO << "MMPROJ FILE detected, multi-model enabled!"; + params.mmproj = jsonBody["mmproj"].asString(); + } params.model = jsonBody["llama_model_path"].asString(); params.n_gpu_layers = jsonBody.get("ngl", 100).asInt(); params.n_ctx = jsonBody.get("ctx_len", 2048).asInt(); diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index 2528056fd..a7f8762b4 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -1834,7 +1834,7 @@ class llamaCPP : public drogon::HttpController { public: llamaCPP() { // Some default values for now below - log_disable(); // Disable the log to file feature, reduce bloat for + // log_disable(); // Disable the log to file feature, reduce bloat for // target // system () std::vector llama_models = @@ -1877,8 +1877,9 @@ class llamaCPP : public drogon::HttpController { METHOD_LIST_END void chatCompletion(const HttpRequestPtr &req, std::function &&callback); - void chatCompletionPrelight(const HttpRequestPtr &req, - std::function &&callback); + void chatCompletionPrelight( + const HttpRequestPtr &req, + std::function &&callback); void embedding(const HttpRequestPtr &req, std::function &&callback); void loadModel(const HttpRequestPtr &req, diff --git a/utils/nitro_utils.h b/utils/nitro_utils.h index 89e39619c..987fbd7e1 100644 --- a/utils/nitro_utils.h +++ b/utils/nitro_utils.h @@ -6,6 +6,7 @@ #include #include #include +#include // Include platform-specific headers #ifdef _WIN32 #include @@ -18,6 +19,19 @@ namespace nitro_utils { inline std::string models_folder = "./models"; +inline std::string extractBase64(const std::string &input) { + std::regex pattern("base64,(.*)"); + std::smatch match; + + if (std::regex_search(input, match, pattern)) { + std::string base64_data = match[1]; + base64_data = base64_data.substr(0, base64_data.length() - 1); + return base64_data; + } + + return ""; +} + inline std::vector listFilesInDir(const std::string &path) { std::vector files;