From 53f65c354e013f28f0978f87f467e7e93a6bd442 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Wed, 23 Jul 2025 21:33:53 -0400 Subject: [PATCH 1/2] imatrix : use GGUF by default Still uses the old format when the output filename ends with .dat but this can be overridden with --output-format --- common/arg.cpp | 9 +++++++++ common/common.h | 7 +++++++ tools/imatrix/README.md | 3 ++- tools/imatrix/imatrix.cpp | 10 ++++++---- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 060053595dbfd..18d9da7441bc0 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2627,6 +2627,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.n_out_freq = value; } ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--output-format"}, "{gguf,dat}", + string_format("output format for imatrix file (default: gguf except when output filename ends with .dat)"), + [](common_params & params, const std::string & value) { + /**/ if (value == "gguf") { params.imat_out_type = COMMON_IMATRIX_FORMAT_GGUF; } + else if (value == "dat") { params.imat_out_type = COMMON_IMATRIX_FORMAT_DAT; } + else { throw std::invalid_argument("invalid output format"); } + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); add_opt(common_arg( {"--save-frequency"}, "N", string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq), diff --git a/common/common.h b/common/common.h index 00f42694eafa8..984b4d21cf758 100644 --- a/common/common.h +++ b/common/common.h @@ -233,6 +233,12 @@ enum common_reasoning_format { COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas. }; +enum common_imatrix_format_type { + COMMON_IMATRIX_FORMAT_AUTO, + COMMON_IMATRIX_FORMAT_GGUF, + COMMON_IMATRIX_FORMAT_DAT, // legacy +}; + struct common_params { int32_t n_predict = -1; // new tokens to predict int32_t n_ctx = 4096; // context size @@ -431,6 +437,7 @@ struct common_params { int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations int32_t i_chunk = 0; // start processing from this chunk + common_imatrix_format_type imat_out_type = COMMON_IMATRIX_FORMAT_AUTO; // format of the output imatrix bool process_output = false; // collect data for the output tensor bool compute_ppl = true; // whether to compute perplexity diff --git a/tools/imatrix/README.md b/tools/imatrix/README.md index 7417a2dec9e6c..437cc0e6edd63 100644 --- a/tools/imatrix/README.md +++ b/tools/imatrix/README.md @@ -7,7 +7,7 @@ More information is available in =2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`. * `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used. * `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks) +* `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf" unless the output filename ends with `.dat`. * `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never) * `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default. * `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets. diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 9aad3711bae54..111d0024b21a3 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -26,7 +26,7 @@ static void print_usage(int, char ** argv) { LOG("\nexample usage:\n"); LOG("\n %s \\\n" - " -m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \\\n" + " -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \\\n" " [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n" " [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n" " [--show-statistics] [...]\n" , argv[0]); @@ -492,13 +492,15 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const { void IMatrixCollector::save_imatrix(int32_t n_chunk) const { auto fname = m_params.out_file; + auto imat_type = m_params.imat_out_type; - // TODO: use the new format in more cases - if (!string_ends_with(fname, ".gguf")) { - LOG_WRN("\n%s: saving to legacy imatrix format because output suffix is not .gguf\n", __func__); + if ((imat_type == COMMON_IMATRIX_FORMAT_AUTO && string_ends_with(fname, ".dat")) || + (imat_type == COMMON_IMATRIX_FORMAT_DAT)) { + LOG_WRN("\n%s: saving to legacy imatrix format\n", __func__); this->save_imatrix_legacy(n_chunk); return; } + // else, default to GGUF imatrix if (n_chunk > 0) { fname += ".at_"; From 1ef3cc1a87ec9344c7329d9c72c3eb991ceb70d7 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Wed, 23 Jul 2025 23:08:03 -0400 Subject: [PATCH 2/2] imatrix : use GGUF regardless of the output filename The legacy format can only be produced with --output-format dat --- common/arg.cpp | 6 +++--- common/common.h | 8 +------- tools/imatrix/README.md | 11 ++++++++--- tools/imatrix/imatrix.cpp | 6 ++---- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 18d9da7441bc0..0c62595115485 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2629,10 +2629,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_examples({LLAMA_EXAMPLE_IMATRIX})); add_opt(common_arg( {"--output-format"}, "{gguf,dat}", - string_format("output format for imatrix file (default: gguf except when output filename ends with .dat)"), + string_format("output format for imatrix file (default: %s)", params.imat_dat ? "dat" : "gguf"), [](common_params & params, const std::string & value) { - /**/ if (value == "gguf") { params.imat_out_type = COMMON_IMATRIX_FORMAT_GGUF; } - else if (value == "dat") { params.imat_out_type = COMMON_IMATRIX_FORMAT_DAT; } + /**/ if (value == "gguf") { params.imat_dat = false; } + else if (value == "dat") { params.imat_dat = true; } else { throw std::invalid_argument("invalid output format"); } } ).set_examples({LLAMA_EXAMPLE_IMATRIX})); diff --git a/common/common.h b/common/common.h index 984b4d21cf758..10c660797a8de 100644 --- a/common/common.h +++ b/common/common.h @@ -233,12 +233,6 @@ enum common_reasoning_format { COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas. }; -enum common_imatrix_format_type { - COMMON_IMATRIX_FORMAT_AUTO, - COMMON_IMATRIX_FORMAT_GGUF, - COMMON_IMATRIX_FORMAT_DAT, // legacy -}; - struct common_params { int32_t n_predict = -1; // new tokens to predict int32_t n_ctx = 4096; // context size @@ -437,7 +431,7 @@ struct common_params { int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations int32_t i_chunk = 0; // start processing from this chunk - common_imatrix_format_type imat_out_type = COMMON_IMATRIX_FORMAT_AUTO; // format of the output imatrix + bool imat_dat = false; // whether the legacy imatrix.dat format should be output bool process_output = false; // collect data for the output tensor bool compute_ppl = true; // whether to compute perplexity diff --git a/tools/imatrix/README.md b/tools/imatrix/README.md index 437cc0e6edd63..4505cb4ce8c7d 100644 --- a/tools/imatrix/README.md +++ b/tools/imatrix/README.md @@ -20,7 +20,7 @@ The parameters in square brackets are optional and have the following meaning: * `-lv | --verbosity` specifies the verbosity level. If set to `0`, no output other than the perplexity of the processed chunks will be generated. If set to `1`, each time the results are saved a message is written to `stderr`. If `>=2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`. * `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used. * `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks) -* `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf" unless the output filename ends with `.dat`. +* `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf". * `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never) * `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default. * `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets. @@ -46,14 +46,19 @@ Recent versions of `llama-imatrix` store data in GGUF format by default. For the ```bash # generate and save the imatrix using legacy format -./llama-imatrix -m ggml-model-f16.gguf -f calibration-data.txt -o imatrix-legcy-format.dat -ngl 99 +./llama-imatrix -m ggml-model-f16.gguf -f calibration-data.txt --output-format dat -o imatrix-legcy-format.dat -ngl 99 ``` ```bash -# covert legacy (binary) imatrix format to new (GGUF) format +# convert legacy (binary) imatrix format to new (GGUF) format ./llama-imatrix --in-file imatrix-legacy-format.dat -o imatrix-new-format.gguf ``` +```bash +# convert new (GGUF) imatrix format to legacy (binary) format +./llama-imatrix --in-file imatrix-new-format.gguf --output-format dat -o imatrix-legacy-format.dat +``` + ```bash # combine existing imatrices ./llama-imatrix --in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf -o imatrix-combined.gguf diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 111d0024b21a3..c693fba012808 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -492,11 +492,9 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const { void IMatrixCollector::save_imatrix(int32_t n_chunk) const { auto fname = m_params.out_file; - auto imat_type = m_params.imat_out_type; + bool use_legacy_format = m_params.imat_dat; - if ((imat_type == COMMON_IMATRIX_FORMAT_AUTO && string_ends_with(fname, ".dat")) || - (imat_type == COMMON_IMATRIX_FORMAT_DAT)) { - LOG_WRN("\n%s: saving to legacy imatrix format\n", __func__); + if (use_legacy_format) { this->save_imatrix_legacy(n_chunk); return; }