From cd96be73a375bea8ec5d8dd48ff3b5299dae6f9e Mon Sep 17 00:00:00 2001 From: Sam Malayek Date: Sun, 12 Oct 2025 12:42:17 -0700 Subject: [PATCH 1/2] Add --embd-output-format raw for plain numeric embedding output This new option outputs embeddings as raw space-separated floats, without JSON or 'embedding N:' prefixes. Useful for downstream vector pipelines and scripting. --- examples/embedding/embedding.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 388908bc4d70a..11b44857a9856 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -4,6 +4,7 @@ #include "llama.h" #include +#include #include #if defined(_MSC_VER) @@ -70,6 +71,29 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu } } +// plain, pipe-friendly output: one embedding per line +static void print_raw_embeddings(const float * emb, + int n_embd_count, + int n_embd, + const llama_model * model, + enum llama_pooling_type pooling_type, + int embd_normalize) { + const uint32_t n_cls_out = llama_model_n_cls_out(model); + const bool is_rank = (pooling_type == LLAMA_POOLING_TYPE_RANK); + const int cols = is_rank ? std::min(n_embd, (int) n_cls_out) : n_embd; + + for (int j = 0; j < n_embd_count; ++j) { + for (int i = 0; i < cols; ++i) { + if (embd_normalize == 0) { + printf("%1.0f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : "")); + } else { + printf("%1.7f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : "")); + } + } + printf("\n"); + } +} + int main(int argc, char ** argv) { common_params params; @@ -259,6 +283,10 @@ int main(int argc, char ** argv) { float * out = emb + e * n_embd; batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize); + if (params.embd_out == "raw") { + print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize); + } + if (params.embd_out.empty()) { LOG("\n"); From c66712074ccf7a409ecbacf31f070035d9b46fc3 Mon Sep 17 00:00:00 2001 From: Sam Malayek Date: Mon, 13 Oct 2025 11:33:41 -0700 Subject: [PATCH 2/2] Move raw output handling into format handling section --- examples/embedding/embedding.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 11b44857a9856..8b25fcdb4fe7a 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -283,10 +283,6 @@ int main(int argc, char ** argv) { float * out = emb + e * n_embd; batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize); - if (params.embd_out == "raw") { - print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize); - } - if (params.embd_out.empty()) { LOG("\n"); @@ -402,6 +398,10 @@ int main(int argc, char ** argv) { if (notArray) LOG("\n}\n"); } + if (params.embd_out == "raw") { + print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize); + } + LOG("\n"); llama_perf_context_print(ctx);