@@ -121,10 +121,6 @@ struct common_grammar_trigger {
121121 common_grammar_trigger_type type;
122122 std::string value;
123123 llama_token token = LLAMA_TOKEN_NULL;
124-
125- // T can only be nlohmann::ordered_json
126- template <class T > T to_json () const ;
127- template <class T > static common_grammar_trigger from_json (const T & in);
128124};
129125
130126// sampling parameters
@@ -184,6 +180,13 @@ struct common_params_sampling {
184180 std::string print () const ;
185181};
186182
183+ struct common_params_model {
184+ std::string path = " " ; // model local path // NOLINT
185+ std::string url = " " ; // model url to download // NOLINT
186+ std::string hf_repo = " " ; // HF repo // NOLINT
187+ std::string hf_file = " " ; // HF file // NOLINT
188+ };
189+
187190struct common_params_speculative {
188191 std::vector<ggml_backend_dev_t > devices; // devices to use for offloading
189192
@@ -197,19 +200,11 @@ struct common_params_speculative {
197200 struct cpu_params cpuparams;
198201 struct cpu_params cpuparams_batch;
199202
200- std::string hf_repo = " " ; // HF repo // NOLINT
201- std::string hf_file = " " ; // HF file // NOLINT
202-
203- std::string model = " " ; // draft model for speculative decoding // NOLINT
204- std::string model_url = " " ; // model url to download // NOLINT
203+ struct common_params_model model;
205204};
206205
207206struct common_params_vocoder {
208- std::string hf_repo = " " ; // HF repo // NOLINT
209- std::string hf_file = " " ; // HF file // NOLINT
210-
211- std::string model = " " ; // model path // NOLINT
212- std::string model_url = " " ; // model url to download // NOLINT
207+ struct common_params_model model;
213208
214209 std::string speaker_file = " " ; // speaker file path // NOLINT
215210
@@ -267,12 +262,10 @@ struct common_params {
267262 struct common_params_speculative speculative;
268263 struct common_params_vocoder vocoder;
269264
270- std::string model = " " ; // model path // NOLINT
265+ struct common_params_model model;
266+
271267 std::string model_alias = " " ; // model alias // NOLINT
272- std::string model_url = " " ; // model url to download // NOLINT
273268 std::string hf_token = " " ; // HF token // NOLINT
274- std::string hf_repo = " " ; // HF repo // NOLINT
275- std::string hf_file = " " ; // HF file // NOLINT
276269 std::string prompt = " " ; // NOLINT
277270 std::string system_prompt = " " ; // NOLINT
278271 std::string prompt_file = " " ; // store the external prompt file name // NOLINT
@@ -286,6 +279,7 @@ struct common_params {
286279 std::vector<std::string> in_files; // all input files
287280 std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
288281 std::vector<llama_model_kv_override> kv_overrides;
282+ std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
289283
290284 bool lora_init_without_apply = false ; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
291285 std::vector<common_adapter_lora_info> lora_adapters; // lora adapter path with user defined scale
@@ -347,7 +341,7 @@ struct common_params {
347341 common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
348342
349343 // multimodal models (see examples/llava)
350- std::string mmproj = " " ; // path to multimodal projector // NOLINT
344+ struct common_params_model mmproj;
351345 std::vector<std::string> image; // path to image file(s)
352346
353347 // embedding
@@ -546,23 +540,6 @@ struct llama_model_params common_model_params_to_llama ( common_params
546540struct llama_context_params common_context_params_to_llama (const common_params & params);
547541struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params (const cpu_params & params);
548542
549- struct llama_model * common_load_model_from_url (
550- const std::string & model_url,
551- const std::string & local_path,
552- const std::string & hf_token,
553- const struct llama_model_params & params);
554-
555- struct llama_model * common_load_model_from_hf (
556- const std::string & repo,
557- const std::string & remote_path,
558- const std::string & local_path,
559- const std::string & hf_token,
560- const struct llama_model_params & params);
561-
562- std::pair<std::string, std::string> common_get_hf_file (
563- const std::string & hf_repo_with_tag,
564- const std::string & hf_token);
565-
566543// clear LoRA adapters from context, then apply new list of adapters
567544void common_set_adapter_lora (struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
568545
0 commit comments