@@ -212,13 +212,13 @@ struct handle_model_result {
212212static handle_model_result common_params_handle_model (
213213 struct common_params_model & model,
214214 const std::string & bearer_token,
215- const std::string & model_path_default,
216215 bool offline) {
217216 handle_model_result result;
218217 // handle pre-fill default model path and url based on hf_repo and hf_file
219218 {
220219 if (!model.docker_repo .empty ()) { // Handle Docker URLs by resolving them to local paths
221220 model.path = common_docker_resolve_model (model.docker_repo );
221+ model.name = model.docker_repo ; // set name for consistency
222222 } else if (!model.hf_repo .empty ()) {
223223 // short-hand to avoid specifying --hf-file -> default it to --model
224224 if (model.hf_file .empty ()) {
@@ -227,7 +227,8 @@ static handle_model_result common_params_handle_model(
227227 if (auto_detected.repo .empty () || auto_detected.ggufFile .empty ()) {
228228 exit (1 ); // built without CURL, error message already printed
229229 }
230- model.hf_repo = auto_detected.repo ;
230+ model.name = model.hf_repo ; // repo name with tag
231+ model.hf_repo = auto_detected.repo ; // repo name without tag
231232 model.hf_file = auto_detected.ggufFile ;
232233 if (!auto_detected.mmprojFile .empty ()) {
233234 result.found_mmproj = true ;
@@ -257,8 +258,6 @@ static handle_model_result common_params_handle_model(
257258 model.path = fs_get_cache_file (string_split<std::string>(f, ' /' ).back ());
258259 }
259260
260- } else if (model.path .empty ()) {
261- model.path = model_path_default;
262261 }
263262 }
264263
@@ -405,7 +404,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
405404
406405 // handle model and download
407406 {
408- auto res = common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH, params.offline );
407+ auto res = common_params_handle_model (params.model , params.hf_token , params.offline );
409408 if (params.no_mmproj ) {
410409 params.mmproj = {};
411410 } else if (res.found_mmproj && params.mmproj .path .empty () && params.mmproj .url .empty ()) {
@@ -415,12 +414,18 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
415414 // only download mmproj if the current example is using it
416415 for (auto & ex : mmproj_examples) {
417416 if (ctx_arg.ex == ex) {
418- common_params_handle_model (params.mmproj , params.hf_token , " " , params.offline );
417+ common_params_handle_model (params.mmproj , params.hf_token , params.offline );
419418 break ;
420419 }
421420 }
422- common_params_handle_model (params.speculative .model , params.hf_token , " " , params.offline );
423- common_params_handle_model (params.vocoder .model , params.hf_token , " " , params.offline );
421+ common_params_handle_model (params.speculative .model , params.hf_token , params.offline );
422+ common_params_handle_model (params.vocoder .model , params.hf_token , params.offline );
423+ }
424+
425+ // model is required (except for server)
426+ // TODO @ngxson : maybe show a list of available models in CLI in this case
427+ if (params.model .path .empty () && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
428+ throw std::invalid_argument (" error: --model is required\n " );
424429 }
425430
426431 if (params.escape ) {
@@ -2090,11 +2095,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20902095 add_opt (common_arg (
20912096 {" -m" , " --model" }, " FNAME" ,
20922097 ex == LLAMA_EXAMPLE_EXPORT_LORA
2093- ? std::string (" model path from which to load base model" )
2094- : string_format (
2095- " model path (default: `models/$filename` with filename from `--hf-file` "
2096- " or `--model-url` if set, otherwise %s)" , DEFAULT_MODEL_PATH
2097- ),
2098+ ? " model path from which to load base model"
2099+ : " model path to load" ,
20982100 [](common_params & params, const std::string & value) {
20992101 params.model .path = value;
21002102 }
@@ -2492,6 +2494,27 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
24922494 }
24932495 }
24942496 ).set_examples ({LLAMA_EXAMPLE_SERVER}));
2497+ add_opt (common_arg (
2498+ {" --models-dir" }, " PATH" ,
2499+ " directory containing models for the router server (default: disabled)" ,
2500+ [](common_params & params, const std::string & value) {
2501+ params.models_dir = value;
2502+ }
2503+ ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_MODELS_DIR" ));
2504+ add_opt (common_arg (
2505+ {" --models-max" }, " N" ,
2506+ string_format (" for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)" , params.models_max ),
2507+ [](common_params & params, int value) {
2508+ params.models_max = value;
2509+ }
2510+ ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_MODELS_MAX" ));
2511+ add_opt (common_arg (
2512+ {" --no-models-autoload" },
2513+ " disables automatic loading of models (default: enabled)" ,
2514+ [](common_params & params) {
2515+ params.models_autoload = false ;
2516+ }
2517+ ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_NO_MODELS_AUTOLOAD" ));
24952518 add_opt (common_arg (
24962519 {" --jinja" },
24972520 string_format (" use jinja template for chat (default: %s)\n " , params.use_jinja ? " enabled" : " disabled" ),
0 commit comments