From 261da51ed2d13c8a4e979e2b311a0bdfa8efc479 Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Thu, 30 Oct 2025 18:43:27 +0000 Subject: [PATCH 01/11] Added support for loading image files from local file:// paths --- tools/server/utils.hpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index fb95361b282..cb556211a65 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -636,7 +636,29 @@ static json oaicompat_chat_params_parse( throw std::runtime_error("Failed to download image"); } - } else { + } else if (string_starts_with(url, "file://")) { + // Strip off the leading "file://" + std::string fname = url.substr(7); + // load local file path + raw_buffer buf; + FILE * f = fopen(fname.c_str(), "rb"); + if (!f) { + LOG_ERR("Unable to open file %s: %s\n", fname.c_str(), strerror(errno)); + throw std::runtime_error("Unable to open image file"); + } + fseek(f, 0, SEEK_END); + long file_size = ftell(f); + fseek(f, 0, SEEK_SET); + buf.resize(file_size); + size_t n_read = fread(buf.data(), 1, file_size, f); + fclose(f); + if (n_read != (size_t)file_size) { + LOG_ERR("Failed to read entire file %s", fname.c_str()); + throw std::runtime_error("Failed to read entire image file"); + } + out_files.push_back(buf); + + } else { // try to decode base64 image std::vector parts = string_split(url, /*separator*/ ','); if (parts.size() != 2) { From 2e9c3e97e138cb39f15ccb6445f73cfda121889e Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Thu, 30 Oct 2025 20:25:56 +0000 Subject: [PATCH 02/11] Added parameters for max filesize for local files and allowed local media path --- common/arg.cpp | 14 ++++++++++++++ common/common.h | 2 ++ tools/server/server.cpp | 2 ++ tools/server/utils.hpp | 28 ++++++++++++++++++++++------ 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index e8941cda41c..3c8cd617b27 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -3281,6 +3281,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.port = value; } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_PORT")); + add_opt(common_arg( + {"--allowed-local-media-path"}, "PATH", + string_format("path from which local media files are allowed to be read from (default: none)"), + [](common_params & params, const std::string & value) { + params.allowed_local_media_path = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ALLOWED_LOCAL_MEDIA_PATH")); + add_opt(common_arg( + {"--local-media-max-size-mb"}, "N", + string_format("max size in mb for local media files (default: %d)", 15), // 15MB + [](common_params & params, int value) { + params.local_media_max_size_mb = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_LOCAL_MEDIA_MAX_SIZE_MB")); add_opt(common_arg( {"--path"}, "PATH", string_format("path to serve static files from (default: %s)", params.public_path.c_str()), diff --git a/common/common.h b/common/common.h index a8cb630ea58..b7245f52777 100644 --- a/common/common.h +++ b/common/common.h @@ -427,9 +427,11 @@ struct common_params { int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting int32_t n_ctx_checkpoints = 8; // max number of context checkpoints per slot int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc. + int32_t local_media_max_size_mb = 15; // max size of loaded local media files std::string hostname = "127.0.0.1"; std::string public_path = ""; // NOLINT + std::string allowed_local_media_path = ""; // NOLINT std::string api_prefix = ""; // NOLINT std::string chat_template = ""; // NOLINT bool use_jinja = false; // NOLINT diff --git a/tools/server/server.cpp b/tools/server/server.cpp index d56468595dd..37465e83a1e 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -2585,6 +2585,8 @@ struct server_context { /* allow_image */ mctx ? mtmd_support_vision(mctx) : false, /* allow_audio */ mctx ? mtmd_support_audio (mctx) : false, /* enable_thinking */ enable_thinking, + /* local_media_max_size_mb */ params_base.local_media_max_size_mb, + /* allowed_local_media_path */ params_base.allowed_local_media_path, }; } diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index cb556211a65..2aba8d27161 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -528,6 +528,8 @@ struct oaicompat_parser_options { bool allow_image; bool allow_audio; bool enable_thinking = true; + int32_t local_media_max_size_mb; + std::string allowed_local_media_path; }; // used by /chat/completions endpoint @@ -637,10 +639,20 @@ static json oaicompat_chat_params_parse( } } else if (string_starts_with(url, "file://")) { - // Strip off the leading "file://" - std::string fname = url.substr(7); - // load local file path - raw_buffer buf; + // Strip off the leading "file://" + std::string fname = url.substr(7); + std::vector fparts = string_split(fname, std::filesystem::path::preferred_separator); + for (const auto &piece : fparts) { + if (piece != "" && !fs_validate_filename(piece)) { + throw std::runtime_error("Invalid filename piece '" + piece + "': " + fname); + } + } + // Check allowed local media path - fs_validate_filename already validated that there is no ".." or "." + if (opt.allowed_local_media_path == "" || !string_starts_with(fname, opt.allowed_local_media_path)) { + throw std::runtime_error("File path not allowed"); + } + // load local file path + raw_buffer buf; FILE * f = fopen(fname.c_str(), "rb"); if (!f) { LOG_ERR("Unable to open file %s: %s\n", fname.c_str(), strerror(errno)); @@ -648,6 +660,10 @@ static json oaicompat_chat_params_parse( } fseek(f, 0, SEEK_END); long file_size = ftell(f); + if (file_size > opt.local_media_max_size_mb * 1024 * 1024) { + fclose(f); + throw std::runtime_error("Local file exceeds maximum allowed size"); + } fseek(f, 0, SEEK_SET); buf.resize(file_size); size_t n_read = fread(buf.data(), 1, file_size, f); @@ -656,9 +672,9 @@ static json oaicompat_chat_params_parse( LOG_ERR("Failed to read entire file %s", fname.c_str()); throw std::runtime_error("Failed to read entire image file"); } - out_files.push_back(buf); + out_files.push_back(buf); - } else { + } else { // try to decode base64 image std::vector parts = string_split(url, /*separator*/ ','); if (parts.size() != 2) { From 8d38d603200efdb6fa32eac4e59b5bcfc490d029 Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Thu, 30 Oct 2025 20:28:06 +0000 Subject: [PATCH 03/11] Fixed whitespace --- tools/server/server.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 37465e83a1e..5c59129f346 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -2585,8 +2585,8 @@ struct server_context { /* allow_image */ mctx ? mtmd_support_vision(mctx) : false, /* allow_audio */ mctx ? mtmd_support_audio (mctx) : false, /* enable_thinking */ enable_thinking, - /* local_media_max_size_mb */ params_base.local_media_max_size_mb, - /* allowed_local_media_path */ params_base.allowed_local_media_path, + /* local_media_max_size_mb */ params_base.local_media_max_size_mb, + /* allowed_local_media_path */ params_base.allowed_local_media_path, }; } From 99d08d67c14ae8a4d51a01a06bfe5aeb303d7e89 Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Thu, 30 Oct 2025 22:41:38 -0400 Subject: [PATCH 04/11] Update server/README.md --- tools/server/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/server/README.md b/tools/server/README.md index c16d0bd6dcd..f5b13e23024 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -172,6 +172,8 @@ The project is under active development, and we are [looking for feedback and co | `--host HOST` | ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: 127.0.0.1)
(env: LLAMA_ARG_HOST) | | `--port PORT` | port to listen (default: 8080)
(env: LLAMA_ARG_PORT) | | `--path PATH` | path to serve static files from (default: )
(env: LLAMA_ARG_STATIC_PATH) | +| `--allowed-local-media-path PATH` | path from which local media files are allowed to be read from (default: none)
(env: LLAMA_ARG_ALLOWED_LOCAL_MEDIA_PATH) | +| `--local-media-max-size-mb N` | max size in mb for local media files (default: 15)
(env: LLAMA_ARG_LOCAL_MEDIA_MAX_SIZE_MB) | | `--api-prefix PREFIX` | prefix path the server serves from, without the trailing slash (default: )
(env: LLAMA_ARG_API_PREFIX) | | `--no-webui` | Disable the Web UI (default: enabled)
(env: LLAMA_ARG_NO_WEBUI) | | `--embedding, --embeddings` | restrict to only support embedding use case; use only with dedicated embedding models (default: disabled)
(env: LLAMA_ARG_EMBEDDINGS) | @@ -1213,6 +1215,8 @@ Given a ChatML-formatted json description in `messages`, it returns the predicte If model supports multimodal, you can input the media file via `image_url` content part. We support both base64 and remote URL as input. See OAI documentation for more. +We also support local files as input (e.g. `file://`) if enabled (see `--allowed-local-media-path` and `--local-media-max-size-mb` for details). + *Options:* See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). llama.cpp `/completion`-specific features such as `mirostat` are also supported. From 79f4fe50be82074d7730bfa7ba5b7ab24945d019 Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Tue, 4 Nov 2025 15:26:34 -0500 Subject: [PATCH 05/11] Update common/common.h - align comments Co-authored-by: Xuan-Son Nguyen --- common/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.h b/common/common.h index 47ac5ff3471..fdc06b76a91 100644 --- a/common/common.h +++ b/common/common.h @@ -429,7 +429,7 @@ struct common_params { int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting int32_t n_ctx_checkpoints = 8; // max number of context checkpoints per slot int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc. - int32_t local_media_max_size_mb = 15; // max size of loaded local media files + int32_t local_media_max_size_mb = 15; // max size of loaded local media files std::string hostname = "127.0.0.1"; std::string public_path = ""; // NOLINT From 75e1291a5930ef24edb9f4acb99fe6ddb1d0b319 Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Tue, 4 Nov 2025 15:27:37 -0500 Subject: [PATCH 06/11] fix arg example string Co-authored-by: Xuan-Son Nguyen --- common/arg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/arg.cpp b/common/arg.cpp index c09a7f4313c..d478b51cd1a 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -3304,7 +3304,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ALLOWED_LOCAL_MEDIA_PATH")); add_opt(common_arg( {"--local-media-max-size-mb"}, "N", - string_format("max size in mb for local media files (default: %d)", 15), // 15MB + string_format("max size in mb for local media files (default: %d)", params.local_media_max_size_mb), [](common_params & params, int value) { params.local_media_max_size_mb = value; } From b47dba28d161f4a5d0420748bd461e7544cf7735 Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Tue, 4 Nov 2025 15:27:58 -0500 Subject: [PATCH 07/11] fix misleading comment Co-authored-by: Xuan-Son Nguyen --- tools/server/utils.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index ea51b4bd796..1949c93d028 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -649,7 +649,7 @@ static json oaicompat_chat_params_parse( throw std::runtime_error("Invalid filename piece '" + piece + "': " + fname); } } - // Check allowed local media path - fs_validate_filename already validated that there is no ".." or "." + // Check allowed local media path if (opt.allowed_local_media_path == "" || !string_starts_with(fname, opt.allowed_local_media_path)) { throw std::runtime_error("File path not allowed"); } From a3e469a66b90179b356cbf4fb67090459f18cd5d Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Tue, 4 Nov 2025 20:52:14 +0000 Subject: [PATCH 08/11] Swapped LOG_ERR out for SVR_ERR --- tools/server/utils.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index ea51b4bd796..737b21096a2 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -657,7 +657,7 @@ static json oaicompat_chat_params_parse( raw_buffer buf; FILE * f = fopen(fname.c_str(), "rb"); if (!f) { - LOG_ERR("Unable to open file %s: %s\n", fname.c_str(), strerror(errno)); + SVR_ERR("Unable to open file %s: %s\n", fname.c_str(), strerror(errno)); throw std::runtime_error("Unable to open image file"); } fseek(f, 0, SEEK_END); @@ -671,7 +671,7 @@ static json oaicompat_chat_params_parse( size_t n_read = fread(buf.data(), 1, file_size, f); fclose(f); if (n_read != (size_t)file_size) { - LOG_ERR("Failed to read entire file %s", fname.c_str()); + SVR_ERR("Failed to read entire file %s", fname.c_str()); throw std::runtime_error("Failed to read entire image file"); } out_files.push_back(buf); From d6d5e18a79e15f9dea94df649bfddc953f5e397b Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Thu, 6 Nov 2025 14:45:52 -0500 Subject: [PATCH 09/11] modernized local media file handling with std::filesystem and std::ifstream --- common/arg.cpp | 4 ++-- common/common.h | 2 +- tools/server/server.cpp | 18 +++++++------- tools/server/utils.hpp | 53 +++++++++++++++++++++-------------------- 4 files changed, 39 insertions(+), 38 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index d478b51cd1a..7c6e245ed47 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -3304,9 +3304,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ALLOWED_LOCAL_MEDIA_PATH")); add_opt(common_arg( {"--local-media-max-size-mb"}, "N", - string_format("max size in mb for local media files (default: %d)", params.local_media_max_size_mb), + string_format("max size in mb for local media files (default: %lu)", params.local_media_max_size_mb), [](common_params & params, int value) { - params.local_media_max_size_mb = value; + params.local_media_max_size_mb = static_cast(value); } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_LOCAL_MEDIA_MAX_SIZE_MB")); add_opt(common_arg( diff --git a/common/common.h b/common/common.h index fdc06b76a91..4e05d4f2c69 100644 --- a/common/common.h +++ b/common/common.h @@ -429,7 +429,7 @@ struct common_params { int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting int32_t n_ctx_checkpoints = 8; // max number of context checkpoints per slot int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc. - int32_t local_media_max_size_mb = 15; // max size of loaded local media files + size_t local_media_max_size_mb = 15; // 0 = no limit, 15 = 1 MiB. Max size of loaded local media files std::string hostname = "127.0.0.1"; std::string public_path = ""; // NOLINT diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 9ca987ac96f..10a929ee078 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -2586,15 +2586,15 @@ struct server_context { SRV_INF("thinking = %d\n", enable_thinking); oai_parser_opt = { - /* use_jinja */ params_base.use_jinja, - /* prefill_assistant */ params_base.prefill_assistant, - /* reasoning_format */ params_base.reasoning_format, - /* chat_template_kwargs */ params_base.default_template_kwargs, - /* common_chat_templates */ chat_templates.get(), - /* allow_image */ mctx ? mtmd_support_vision(mctx) : false, - /* allow_audio */ mctx ? mtmd_support_audio (mctx) : false, - /* enable_thinking */ enable_thinking, - /* local_media_max_size_mb */ params_base.local_media_max_size_mb, + /* use_jinja */ params_base.use_jinja, + /* prefill_assistant */ params_base.prefill_assistant, + /* reasoning_format */ params_base.reasoning_format, + /* chat_template_kwargs */ params_base.default_template_kwargs, + /* common_chat_templates */ chat_templates.get(), + /* allow_image */ mctx ? mtmd_support_vision(mctx) : false, + /* allow_audio */ mctx ? mtmd_support_audio (mctx) : false, + /* enable_thinking */ enable_thinking, + /* local_media_max_size_mb */ params_base.local_media_max_size_mb, /* allowed_local_media_path */ params_base.allowed_local_media_path, }; } diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index cc292a62553..5a495c675e9 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -28,6 +28,8 @@ #include #include #include +#include +#include #define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo" @@ -530,7 +532,7 @@ struct oaicompat_parser_options { bool allow_image; bool allow_audio; bool enable_thinking = true; - int32_t local_media_max_size_mb; + size_t local_media_max_size_mb; std::string allowed_local_media_path; }; @@ -642,36 +644,35 @@ static json oaicompat_chat_params_parse( } else if (string_starts_with(url, "file://")) { // Strip off the leading "file://" - std::string fname = url.substr(7); - std::vector fparts = string_split(fname, std::filesystem::path::preferred_separator); - for (const auto &piece : fparts) { - if (piece != "" && !fs_validate_filename(piece)) { - throw std::runtime_error("Invalid filename piece '" + piece + "': " + fname); - } + const std::string fname = url.substr(7); + if (opt.allowed_local_media_path == "") { + throw std::runtime_error("Local media paths are not enabled"); } - // Check allowed local media path - if (opt.allowed_local_media_path == "" || !string_starts_with(fname, opt.allowed_local_media_path)) { - throw std::runtime_error("File path not allowed"); + const std::filesystem::path allowed_local_media_path = std::filesystem::canonical(std::filesystem::path(opt.allowed_local_media_path)); + const std::filesystem::path input_path = std::filesystem::canonical(std::filesystem::path(fname)); + if (!std::filesystem::is_directory(allowed_local_media_path)) { + throw std::runtime_error("Invalid local media path: " + opt.allowed_local_media_path); } - // load local file path - raw_buffer buf; - FILE * f = fopen(fname.c_str(), "rb"); - if (!f) { - SVR_ERR("Unable to open file %s: %s\n", fname.c_str(), strerror(errno)); - throw std::runtime_error("Unable to open image file"); + auto [allowed_end, nothing] = std::mismatch(allowed_local_media_path.begin(), allowed_local_media_path.end(), input_path.begin()); + if (allowed_end != allowed_local_media_path.end()) { + throw std::runtime_error("Local media file path not allowed: " + fname); + } + if (!std::filesystem::is_regular_file(input_path)) { + throw std::runtime_error("Local media file does not exist: " + fname); } - fseek(f, 0, SEEK_END); - long file_size = ftell(f); + const auto file_size = std::filesystem::file_size(input_path); if (file_size > opt.local_media_max_size_mb * 1024 * 1024) { - fclose(f); - throw std::runtime_error("Local file exceeds maximum allowed size"); + throw std::runtime_error("Local media file exceeds maximum allowed size"); + } + // load local file path + std::ifstream f(input_path, std::ios::binary); + if (!f) { + SRV_ERR("Unable to open file %s: %s\n", fname.c_str(), strerror(errno)); + throw std::runtime_error("Unable to open local media file: " + fname); } - fseek(f, 0, SEEK_SET); - buf.resize(file_size); - size_t n_read = fread(buf.data(), 1, file_size, f); - fclose(f); - if (n_read != (size_t)file_size) { - SVR_ERR("Failed to read entire file %s", fname.c_str()); + raw_buffer buf((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + if (buf.size() != file_size) { + SRV_ERR("Failed to read entire file %s", fname.c_str()); throw std::runtime_error("Failed to read entire image file"); } out_files.push_back(buf); From ebcc67170fabf0f10829944d70aa415dd6b77abf Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Thu, 6 Nov 2025 15:35:41 -0500 Subject: [PATCH 10/11] some cleanup, and moving the allowed_local_media_path check to the arg parsing on startup instead of when a request comes in --- common/arg.cpp | 9 ++++++++- common/common.h | 3 ++- tools/server/utils.hpp | 16 ++++++---------- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 7c6e245ed47..c3006d2b110 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -3299,7 +3299,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--allowed-local-media-path"}, "PATH", string_format("path from which local media files are allowed to be read from (default: none)"), [](common_params & params, const std::string & value) { - params.allowed_local_media_path = value; + try { + params.allowed_local_media_path = std::filesystem::canonical(std::filesystem::path(value)); + if (!std::filesystem::is_directory(params.allowed_local_media_path)) { + throw std::invalid_argument(string_format("allowed local media path must be a dir: %s", params.allowed_local_media_path.c_str())); + } + } catch (std::filesystem::filesystem_error &err) { + throw std::invalid_argument(string_format("invalid allowed local media path: %s", err.what())); + } } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ALLOWED_LOCAL_MEDIA_PATH")); add_opt(common_arg( diff --git a/common/common.h b/common/common.h index 4e05d4f2c69..d42dba36ce6 100644 --- a/common/common.h +++ b/common/common.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "ggml-opt.h" #include "llama-cpp.h" @@ -433,7 +434,7 @@ struct common_params { std::string hostname = "127.0.0.1"; std::string public_path = ""; // NOLINT - std::string allowed_local_media_path = ""; // NOLINT + std::filesystem::path allowed_local_media_path; // NOLINT std::string api_prefix = ""; // NOLINT std::string chat_template = ""; // NOLINT bool use_jinja = false; // NOLINT diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index 5a495c675e9..c96c3bcac75 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -533,7 +533,7 @@ struct oaicompat_parser_options { bool allow_audio; bool enable_thinking = true; size_t local_media_max_size_mb; - std::string allowed_local_media_path; + std::filesystem::path allowed_local_media_path; }; // used by /chat/completions endpoint @@ -643,18 +643,14 @@ static json oaicompat_chat_params_parse( } } else if (string_starts_with(url, "file://")) { - // Strip off the leading "file://" - const std::string fname = url.substr(7); - if (opt.allowed_local_media_path == "") { + if (opt.allowed_local_media_path.empty()) { throw std::runtime_error("Local media paths are not enabled"); } - const std::filesystem::path allowed_local_media_path = std::filesystem::canonical(std::filesystem::path(opt.allowed_local_media_path)); + // Strip off the leading "file://" + const std::string fname = url.substr(7); const std::filesystem::path input_path = std::filesystem::canonical(std::filesystem::path(fname)); - if (!std::filesystem::is_directory(allowed_local_media_path)) { - throw std::runtime_error("Invalid local media path: " + opt.allowed_local_media_path); - } - auto [allowed_end, nothing] = std::mismatch(allowed_local_media_path.begin(), allowed_local_media_path.end(), input_path.begin()); - if (allowed_end != allowed_local_media_path.end()) { + auto [allowed_end, nothing] = std::mismatch(opt.allowed_local_media_path.begin(), opt.allowed_local_media_path.end(), input_path.begin()); + if (allowed_end != opt.allowed_local_media_path.end()) { throw std::runtime_error("Local media file path not allowed: " + fname); } if (!std::filesystem::is_regular_file(input_path)) { From 3cef933ba77fcfced61f1085d19173d873815755 Mon Sep 17 00:00:00 2001 From: Cheni Chadowitz Date: Thu, 6 Nov 2025 17:17:11 -0500 Subject: [PATCH 11/11] added server tests for allowed local media path and size args --- tools/server/tests/unit/test_vision_api.py | 89 +++++++++++++++++++++- tools/server/tests/utils.py | 6 ++ 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/tools/server/tests/unit/test_vision_api.py b/tools/server/tests/unit/test_vision_api.py index 9408116d1cf..cccd95fb2fe 100644 --- a/tools/server/tests/unit/test_vision_api.py +++ b/tools/server/tests/unit/test_vision_api.py @@ -2,12 +2,15 @@ from utils import * import base64 import requests +from pathlib import Path server: ServerProcess -def get_img_url(id: str) -> str: + +def get_img_url(id: str, tmp_path: str | None = None) -> str: IMG_URL_0 = "https://huggingface.co/ggml-org/tinygemma3-GGUF/resolve/main/test/11_truck.png" IMG_URL_1 = "https://huggingface.co/ggml-org/tinygemma3-GGUF/resolve/main/test/91_cat.png" + IMG_FILE_2 = "https://picsum.photos/id/237/5000" if id == "IMG_URL_0": return IMG_URL_0 elif id == "IMG_URL_1": @@ -28,6 +31,46 @@ def get_img_url(id: str) -> str: response = requests.get(IMG_URL_1) response.raise_for_status() # Raise an exception for bad status codes return base64.b64encode(response.content).decode("utf-8") + elif id == "IMG_FILE_0": + if tmp_path is None: + raise RuntimeError("get_img_url must be called with a tmp_path if using local files") + image_name = IMG_URL_0.split('/')[-1] + file_name: Path = Path(tmp_path) / image_name + if file_name.exists(): + return f"file://{file_name}" + else: + response = requests.get(IMG_URL_0) + response.raise_for_status() # Raise an exception for bad status codes + with open(file_name, 'wb') as f: + f.write(response.content) + return f"file://{file_name}" + elif id == "IMG_FILE_1": + if tmp_path is None: + raise RuntimeError("get_img_url must be called with a tmp_path if using local files") + image_name = IMG_URL_1.split('/')[-1] + file_name: Path = Path(tmp_path) / image_name + if file_name.exists(): + return f"file://{file_name}" + else: + response = requests.get(IMG_URL_1) + response.raise_for_status() # Raise an exception for bad status codes + with open(file_name, 'wb') as f: + f.write(response.content) + return f"file://{file_name}" + elif id == "IMG_FILE_2": + if tmp_path is None: + raise RuntimeError("get_img_url must be called with a tmp_path if using local files") + image_name = "dog.jpg" + file_name: Path = Path(tmp_path) / image_name + if file_name.exists(): + return f"file://{file_name}" + else: + response = requests.get(IMG_FILE_2) + response.raise_for_status() # Raise an exception for bad status codes + with open(file_name, 'wb') as f: + f.write(response.content) + return f"file://{file_name}" + else: return id @@ -70,6 +113,9 @@ def test_v1_models_supports_multimodal_capability(): ("What is this:\n", "malformed", False, None), ("What is this:\n", "https://google.com/404", False, None), # non-existent image ("What is this:\n", "https://ggml.ai", False, None), # non-image data + ("What is this:\n", "IMG_FILE_0", False, None), + ("What is this:\n", "IMG_FILE_1", False, None), + ("What is this:\n", "IMG_FILE_2", False, None), # TODO @ngxson : test with multiple images, no images and with audio ] ) @@ -83,7 +129,7 @@ def test_vision_chat_completion(prompt, image_url, success, re_content): {"role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": { - "url": get_img_url(image_url), + "url": get_img_url(image_url, "./tmp"), }}, ]}, ], @@ -97,6 +143,45 @@ def test_vision_chat_completion(prompt, image_url, success, re_content): assert res.status_code != 200 +@pytest.mark.parametrize( + "allowed_mb_size, allowed_path, img_dir_path, prompt, image_url, success, re_content", + [ + # test model is trained on CIFAR-10, but it's quite dumb due to small size + (0, "./tmp", "./tmp", "What is this:\n", "IMG_FILE_0", True, "(cat)+"), + (0, "./tmp", "./tmp", "What is this:\n", "IMG_FILE_1", True, "(frog)+"), + (1, "./tmp", "./tmp", "What is this:\n", "IMG_FILE_2", False, None), + (0, "./tmp/allowed", "./tmp", "What is this:\n", "IMG_FILE_0", False, None), + (0, "./tm", "./tmp", "What is this:\n", "IMG_FILE_0", False, None), + (0, "./tmp/allowed", "./tmp/allowed/..", "What is this:\n", "IMG_FILE_0", False, None), + (0, "./tmp/allowed", "./tmp/allowed/../.", "What is this:\n", "IMG_FILE_0", False, None), + ] +) +def test_vision_chat_completion_local_files(allowed_mb_size, allowed_path, img_dir_path, prompt, image_url, success, re_content): + global server + server.local_media_max_size_mb = allowed_mb_size + server.allowed_local_media_path = allowed_path + Path(allowed_path).mkdir(exist_ok=True) + server.start() + res = server.make_request("POST", "/chat/completions", data={ + "temperature": 0.0, + "top_k": 1, + "messages": [ + {"role": "user", "content": [ + {"type": "text", "text": prompt}, + {"type": "image_url", "image_url": { + "url": get_img_url(image_url, img_dir_path), + }}, + ]}, + ], + }) + if success: + assert res.status_code == 200 + choice = res.body["choices"][0] + assert "assistant" == choice["message"]["role"] + assert match_regex(re_content, choice["message"]["content"]) + else: + assert res.status_code != 200 + @pytest.mark.parametrize( "prompt, image_data, success, re_content", [ diff --git a/tools/server/tests/utils.py b/tools/server/tests/utils.py index da703c4c51a..eb1650e4055 100644 --- a/tools/server/tests/utils.py +++ b/tools/server/tests/utils.py @@ -95,6 +95,8 @@ class ServerProcess: chat_template_file: str | None = None server_path: str | None = None mmproj_url: str | None = None + local_media_max_size_mb: int | None = None + allowed_local_media_path: str | None = None # session variables process: subprocess.Popen | None = None @@ -215,6 +217,10 @@ def start(self, timeout_seconds: int | None = DEFAULT_HTTP_TIMEOUT) -> None: server_args.extend(["--chat-template-file", self.chat_template_file]) if self.mmproj_url: server_args.extend(["--mmproj-url", self.mmproj_url]) + if self.local_media_max_size_mb: + server_args.extend(["--local-media-max-size-mb", self.local_media_max_size_mb]) + if self.allowed_local_media_path: + server_args.extend(["--allowed-local-media-path", self.allowed_local_media_path]) args = [str(arg) for arg in [server_path, *server_args]] print(f"tests: starting server with: {' '.join(args)}")