diff --git a/llama.cpp b/llama.cpp
index e966faeeda90f..0598fdb8e044d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6246,6 +6246,7 @@ struct llm_build_context {
 
         // final output
         cur = inpL;
+        cb(cur, "result_embd", -1);
 
         // pooling layer
         switch (pooling_type) {
@@ -6256,17 +6257,18 @@ struct llm_build_context {
             case LLAMA_POOLING_TYPE_MEAN:
                 {
                     cur = ggml_mul_mat(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, cur)), inp_mean);
+                    cb(cur, "result_embd_pooled", -1);
                 } break;
             case LLAMA_POOLING_TYPE_CLS:
                 {
                     cur = ggml_get_rows(ctx0, cur, inp_cls);
+                    cb(cur, "result_embd_pooled", -1);
                 } break;
             case LLAMA_POOLING_TYPE_UNSPECIFIED:
                 {
                     GGML_ASSERT(false && "Invalid pooling type");
                 } break;
         }
-        cb(cur, "result_embd", -1);
 
         ggml_build_forward_expand(gf, cur);
 
@@ -8281,7 +8283,7 @@ static int llama_decode_internal(
         // token or sequence embeddings
         embd = gf->nodes[gf->n_nodes - 1];
 
-        GGML_ASSERT(strcmp(embd->name, "result_embd") == 0);
+        GGML_ASSERT(strcmp(embd->name, "result_embd") == 0 || strcmp(embd->name, "result_embd_pooled") == 0);
     } else {
         if (strcmp(res->name, "result_output") == 0) {
             // the token embeddings could be the second to last tensor, or the third to last tensor
@@ -8413,6 +8415,8 @@ static int llama_decode_internal(
             case LLAMA_POOLING_TYPE_CLS:
             case LLAMA_POOLING_TYPE_MEAN:
                 {
+                    GGML_ASSERT(strcmp(embd->name, "result_embd_pooled") == 0);
+
                     // extract sequence embeddings
                     auto & embd_seq_out = lctx.embd_seq;
                     embd_seq_out.clear();