From eee7d597d9bf90469091519f41b5605cd52038e8 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 2 Dec 2025 19:04:04 +0100 Subject: [PATCH 1/2] llama-server: fix duplicate HTTP headers in multiple models mode (#17693) --- tools/server/server-models.cpp | 48 +++++++++++++++++++++++++++++++--- tools/server/server-models.h | 1 + 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index ac7f6b86bf8..7859b89dc7a 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -14,6 +15,8 @@ #include #include #include +#include +#include #ifdef _WIN32 #include @@ -889,6 +892,32 @@ struct pipe_t { } }; +static std::string to_lower_copy(const std::string & value) { + std::string lowered(value.size(), '\0'); + std::transform(value.begin(), value.end(), lowered.begin(), [](unsigned char c) { return std::tolower(c); }); + return lowered; +} + +static bool should_strip_proxy_header(const std::string & header_name) { + // Headers that get duplicated when router forwards child responses + static const std::unordered_set strip_list = { + "server", + "transfer-encoding", + "keep-alive" + }; + + if (strip_list.count(header_name) > 0) { + return true; + } + + // Router injects CORS, child also sends them: duplicate + if (header_name.rfind("access-control-", 0) == 0) { + return true; + } + + return false; +} + server_http_proxy::server_http_proxy( const std::string & method, const std::string & host, @@ -925,6 +954,14 @@ server_http_proxy::server_http_proxy( msg_t msg; msg.status = response.status; for (const auto & [key, value] : response.headers) { + const auto lowered = to_lower_copy(key); + if (should_strip_proxy_header(lowered)) { + continue; + } + if (lowered == "content-type") { + msg.content_type = value; + continue; + } msg.headers[key] = value; } return pipe->write(std::move(msg)); // send headers first @@ -932,7 +969,7 @@ server_http_proxy::server_http_proxy( httplib::ContentReceiverWithProgress content_receiver = [pipe](const char * data, size_t data_length, size_t, size_t) { // send data chunks // returns false if pipe is closed / broken (signal to stop receiving) - return pipe->write({{}, 0, std::string(data, data_length)}); + return pipe->write({{}, 0, std::string(data, data_length), ""}); }; // prepare the request to destination server @@ -955,8 +992,8 @@ server_http_proxy::server_http_proxy( if (result.error() != httplib::Error::Success) { auto err_str = httplib::to_string(result.error()); SRV_ERR("http client error: %s\n", err_str.c_str()); - pipe->write({{}, 500, ""}); // header - pipe->write({{}, 0, "proxy error: " + err_str}); // body + pipe->write({{}, 500, "", ""}); // header + pipe->write({{}, 0, "proxy error: " + err_str, ""}); // body } pipe->close_write(); // signal EOF to reader SRV_DBG("%s", "client request thread ended\n"); @@ -968,7 +1005,10 @@ server_http_proxy::server_http_proxy( if (pipe->read(header, should_stop)) { SRV_DBG("%s", "received response headers\n"); this->status = header.status; - this->headers = header.headers; + this->headers = std::move(header.headers); + if (!header.content_type.empty()) { + this->content_type = std::move(header.content_type); + } } else { SRV_DBG("%s", "no response headers received (request cancelled?)\n"); } diff --git a/tools/server/server-models.h b/tools/server/server-models.h index b9bec983ef6..526e7488dc9 100644 --- a/tools/server/server-models.h +++ b/tools/server/server-models.h @@ -170,5 +170,6 @@ struct server_http_proxy : server_http_res { std::map headers; int status = 0; std::string data; + std::string content_type; }; }; From d76c7c28d4e07403385bacd93e770df95650925b Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 2 Dec 2025 19:44:58 +0100 Subject: [PATCH 2/2] llama-server: address review feedback from ngxson - restrict scope of header after std::move - simplify header check (remove unordered_set) --- tools/server/server-models.cpp | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 7859b89dc7a..c1fbaf4ec91 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -15,8 +15,6 @@ #include #include #include -#include -#include #ifdef _WIN32 #include @@ -900,13 +898,9 @@ static std::string to_lower_copy(const std::string & value) { static bool should_strip_proxy_header(const std::string & header_name) { // Headers that get duplicated when router forwards child responses - static const std::unordered_set strip_list = { - "server", - "transfer-encoding", - "keep-alive" - }; - - if (strip_list.count(header_name) > 0) { + if (header_name == "server" || + header_name == "transfer-encoding" || + header_name == "keep-alive") { return true; } @@ -1001,15 +995,17 @@ server_http_proxy::server_http_proxy( this->thread.detach(); // wait for the first chunk (headers) - msg_t header; - if (pipe->read(header, should_stop)) { - SRV_DBG("%s", "received response headers\n"); - this->status = header.status; - this->headers = std::move(header.headers); - if (!header.content_type.empty()) { - this->content_type = std::move(header.content_type); + { + msg_t header; + if (pipe->read(header, should_stop)) { + SRV_DBG("%s", "received response headers\n"); + this->status = header.status; + this->headers = std::move(header.headers); + if (!header.content_type.empty()) { + this->content_type = std::move(header.content_type); + } + } else { + SRV_DBG("%s", "no response headers received (request cancelled?)\n"); } - } else { - SRV_DBG("%s", "no response headers received (request cancelled?)\n"); } }