Skip to content

Commit 6e30a6c

Browse files
Merge pull request #41 from menloresearch/update-dev-from-master-2025-04-03-00-08
Sync master with upstream release b5033
2 parents 49f328b + f01bd02 commit 6e30a6c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1456
-2922
lines changed

common/arg.cpp

Lines changed: 644 additions & 71 deletions
Large diffs are not rendered by default.

common/common.cpp

Lines changed: 14 additions & 519 deletions
Large diffs are not rendered by default.

common/common.h

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,6 @@ struct common_grammar_trigger {
121121
common_grammar_trigger_type type;
122122
std::string value;
123123
llama_token token = LLAMA_TOKEN_NULL;
124-
125-
// T can only be nlohmann::ordered_json
126-
template <class T> T to_json() const;
127-
template <class T> static common_grammar_trigger from_json(const T & in);
128124
};
129125

130126
// sampling parameters
@@ -184,6 +180,13 @@ struct common_params_sampling {
184180
std::string print() const;
185181
};
186182

183+
struct common_params_model {
184+
std::string path = ""; // model local path // NOLINT
185+
std::string url = ""; // model url to download // NOLINT
186+
std::string hf_repo = ""; // HF repo // NOLINT
187+
std::string hf_file = ""; // HF file // NOLINT
188+
};
189+
187190
struct common_params_speculative {
188191
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
189192

@@ -197,19 +200,11 @@ struct common_params_speculative {
197200
struct cpu_params cpuparams;
198201
struct cpu_params cpuparams_batch;
199202

200-
std::string hf_repo = ""; // HF repo // NOLINT
201-
std::string hf_file = ""; // HF file // NOLINT
202-
203-
std::string model = ""; // draft model for speculative decoding // NOLINT
204-
std::string model_url = ""; // model url to download // NOLINT
203+
struct common_params_model model;
205204
};
206205

207206
struct common_params_vocoder {
208-
std::string hf_repo = ""; // HF repo // NOLINT
209-
std::string hf_file = ""; // HF file // NOLINT
210-
211-
std::string model = ""; // model path // NOLINT
212-
std::string model_url = ""; // model url to download // NOLINT
207+
struct common_params_model model;
213208

214209
std::string speaker_file = ""; // speaker file path // NOLINT
215210

@@ -267,12 +262,10 @@ struct common_params {
267262
struct common_params_speculative speculative;
268263
struct common_params_vocoder vocoder;
269264

270-
std::string model = ""; // model path // NOLINT
265+
struct common_params_model model;
266+
271267
std::string model_alias = ""; // model alias // NOLINT
272-
std::string model_url = ""; // model url to download // NOLINT
273268
std::string hf_token = ""; // HF token // NOLINT
274-
std::string hf_repo = ""; // HF repo // NOLINT
275-
std::string hf_file = ""; // HF file // NOLINT
276269
std::string prompt = ""; // NOLINT
277270
std::string system_prompt = ""; // NOLINT
278271
std::string prompt_file = ""; // store the external prompt file name // NOLINT
@@ -286,6 +279,7 @@ struct common_params {
286279
std::vector<std::string> in_files; // all input files
287280
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
288281
std::vector<llama_model_kv_override> kv_overrides;
282+
std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
289283

290284
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
291285
std::vector<common_adapter_lora_info> lora_adapters; // lora adapter path with user defined scale
@@ -347,7 +341,7 @@ struct common_params {
347341
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
348342

349343
// multimodal models (see examples/llava)
350-
std::string mmproj = ""; // path to multimodal projector // NOLINT
344+
struct common_params_model mmproj;
351345
std::vector<std::string> image; // path to image file(s)
352346

353347
// embedding
@@ -546,23 +540,6 @@ struct llama_model_params common_model_params_to_llama ( common_params
546540
struct llama_context_params common_context_params_to_llama(const common_params & params);
547541
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params);
548542

549-
struct llama_model * common_load_model_from_url(
550-
const std::string & model_url,
551-
const std::string & local_path,
552-
const std::string & hf_token,
553-
const struct llama_model_params & params);
554-
555-
struct llama_model * common_load_model_from_hf(
556-
const std::string & repo,
557-
const std::string & remote_path,
558-
const std::string & local_path,
559-
const std::string & hf_token,
560-
const struct llama_model_params & params);
561-
562-
std::pair<std::string, std::string> common_get_hf_file(
563-
const std::string & hf_repo_with_tag,
564-
const std::string & hf_token);
565-
566543
// clear LoRA adapters from context, then apply new list of adapters
567544
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
568545

examples/batched-bench/batched-bench.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ int main(int argc, char ** argv) {
3838

3939
llama_model_params model_params = common_model_params_to_llama(params);
4040

41-
llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);
41+
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
4242

4343
if (model == NULL) {
4444
fprintf(stderr , "%s: error: unable to load model\n" , __func__);

examples/batched/batched.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ int main(int argc, char ** argv) {
4141

4242
llama_model_params model_params = common_model_params_to_llama(params);
4343

44-
llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);
44+
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
4545

4646
if (model == NULL) {
4747
LOG_ERR("%s: error: unable to load model\n" , __func__);

examples/export-lora/export-lora.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ int main(int argc, char ** argv) {
421421

422422
g_verbose = (params.verbosity > 1);
423423
try {
424-
lora_merge_ctx ctx(params.model, params.lora_adapters, params.out_file, params.cpuparams.n_threads);
424+
lora_merge_ctx ctx(params.model.path, params.lora_adapters, params.out_file, params.cpuparams.n_threads);
425425
ctx.run_merge();
426426
} catch (const std::exception & err) {
427427
fprintf(stderr, "%s\n", err.what());

examples/gritlm/gritlm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ int main(int argc, char * argv[]) {
168168

169169
llama_backend_init();
170170

171-
llama_model * model = llama_model_load_from_file(params.model.c_str(), mparams);
171+
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
172172

173173
// create generation context
174174
llama_context * ctx = llama_init_from_model(model, cparams);

examples/llava/README-gemma3.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,26 @@
44
>
55
> This is very experimental, only used for demo purpose.
66
7+
## Quick started
8+
9+
You can use pre-quantized model from [ggml-org](https://huggingface.co/ggml-org)'s Hugging Face account
10+
11+
```bash
12+
# build
13+
cmake -B build
14+
cmake --build build --target llama-gemma3-cli
15+
16+
# alternatively, install from brew (MacOS)
17+
brew install llama.cpp
18+
19+
# run it
20+
llama-gemma3-cli -hf ggml-org/gemma-3-4b-it-GGUF
21+
llama-gemma3-cli -hf ggml-org/gemma-3-12b-it-GGUF
22+
llama-gemma3-cli -hf ggml-org/gemma-3-27b-it-GGUF
23+
24+
# note: 1B model does not support vision
25+
```
26+
727
## How to get mmproj.gguf?
828

929
```bash

examples/llava/gemma3-cli.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ struct gemma3_context {
7878
}
7979

8080
void init_clip_model(common_params & params) {
81-
const char * clip_path = params.mmproj.c_str();
81+
const char * clip_path = params.mmproj.path.c_str();
8282
ctx_clip = clip_model_load(clip_path, params.verbosity > 1);
8383
}
8484

@@ -232,13 +232,13 @@ int main(int argc, char ** argv) {
232232

233233
common_init();
234234

235-
if (params.mmproj.empty()) {
235+
if (params.mmproj.path.empty()) {
236236
show_additional_info(argc, argv);
237237
return 1;
238238
}
239239

240240
gemma3_context ctx(params);
241-
printf("%s: %s\n", __func__, params.model.c_str());
241+
printf("%s: %s\n", __func__, params.model.path.c_str());
242242

243243
bool is_single_turn = !params.prompt.empty() && !params.image.empty();
244244

examples/llava/llava-cli.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ static struct llama_model * llava_init(common_params * params) {
225225

226226
llama_model_params model_params = common_model_params_to_llama(*params);
227227

228-
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
228+
llama_model * model = llama_model_load_from_file(params->model.path.c_str(), model_params);
229229
if (model == NULL) {
230230
LOG_ERR("%s: unable to load model\n" , __func__);
231231
return NULL;
@@ -234,7 +234,7 @@ static struct llama_model * llava_init(common_params * params) {
234234
}
235235

236236
static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
237-
const char * clip_path = params->mmproj.c_str();
237+
const char * clip_path = params->mmproj.path.c_str();
238238

239239
auto prompt = params->prompt;
240240
if (prompt.empty()) {
@@ -283,7 +283,7 @@ int main(int argc, char ** argv) {
283283

284284
common_init();
285285

286-
if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
286+
if (params.mmproj.path.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
287287
print_usage(argc, argv);
288288
return 1;
289289
}

0 commit comments

Comments
 (0)