diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e2a8e329..5fb84f07f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- fix: Correct batched embedding outputs for multi-sequence `embed()` calls by @Anai-Guo in #2205 + ## [0.3.22] - feat: Update llama.cpp to ggerganov/llama.cpp@63d93d173 diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py index cde52c8c8..24f6fddc7 100644 --- a/llama_cpp/_internals.py +++ b/llama_cpp/_internals.py @@ -522,7 +522,7 @@ def add_sequence(self, batch: Sequence[int], seq_id: int, logits_all: bool): self.batch.seq_id[j][0] = seq_id self.batch.n_seq_id[j] = 1 self.batch.logits[j] = logits_all - self.batch.logits[n_tokens - 1] = True + self.batch.logits[n_tokens0 + n_tokens - 1] = True class LlamaTokenDataArray: