From bc4884d280c47f806bf98798cacc102516fc3023 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Thu, 14 Aug 2025 13:41:08 +0900
Subject: [PATCH 1/4] perplexity: give more information about constraints on
 failure

This checks whether -np is insufficient vs context, and provides clues as to how much is needed for each.
---
 tools/perplexity/perplexity.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp
index 81bdc7c19cae4..d929d8c921605 100644
--- a/tools/perplexity/perplexity.cpp
+++ b/tools/perplexity/perplexity.cpp
@@ -920,7 +920,7 @@ static void hellaswag_score(llama_context * ctx, const common_params & params) {
         }
 
         if (i0 == i1) {
-            LOG_ERR("%s : task %zu does not fit in the context window\n", __func__, i0);
+            LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, hs_data[i0].required_tokens);
             return;
         }
 
@@ -1213,7 +1213,7 @@ static void winogrande_score(llama_context * ctx, const common_params & params)
         }
 
         if (i0 == i1) {
-            LOG_ERR("%s : task %zu does not fit in the context window\n", __func__, i0);
+            LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, data[i0].required_tokens);
             return;
         }
 
@@ -1542,12 +1542,14 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par
         // the common prefix is shared among the 4 sequences to save tokens
         // we extract logits only from the last common token and from all ending tokens of each sequence
         int s0 = 0;
+        int max_seq_exceeded = 0;
         while (n_cur + (int) tasks[i1].required_tokens <= n_ctx) {
             auto& cur_task = tasks[i1];
             int n_logits = 0;
 
             int num_answers = cur_task.seq_tokens.size();
             if (s0 + num_answers > max_seq) {
+                max_seq_exceeded = s0 + num_answers;
                 break;
             }
 
@@ -1588,7 +1590,11 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par
         }
 
         if (i0 == i1) {
-            LOG_ERR("%s : task %zu does not fit in the context window\n", __func__, i0);
+            if (max_seq_exceeded > max_seq) {
+                LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %zu)\n", __func__, i0, max_seq_exceeded);
+            } else {
+                LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, tasks[i0].required_tokens);
+            }
             return;
         }
 

From c80860c90783d8ae7a92b110ea38667071450576 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Thu, 14 Aug 2025 13:48:05 +0900
Subject: [PATCH 2/4] log formatting

---
 tools/perplexity/perplexity.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp
index d929d8c921605..fe80344e2373c 100644
--- a/tools/perplexity/perplexity.cpp
+++ b/tools/perplexity/perplexity.cpp
@@ -1591,7 +1591,7 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par
 
         if (i0 == i1) {
             if (max_seq_exceeded > max_seq) {
-                LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %zu)\n", __func__, i0, max_seq_exceeded);
+                LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %d)\n", __func__, i0, max_seq_exceeded);
             } else {
                 LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, tasks[i0].required_tokens);
             }

From b1c8843bbff02fb04363e09b0d6378f845ab567e Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Thu, 14 Aug 2025 14:04:40 +0900
Subject: [PATCH 3/4] log error and return instead of storing max_seq_exceeded
 int

---
 tools/perplexity/perplexity.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp
index fe80344e2373c..64f780a427aed 100644
--- a/tools/perplexity/perplexity.cpp
+++ b/tools/perplexity/perplexity.cpp
@@ -1542,14 +1542,16 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par
         // the common prefix is shared among the 4 sequences to save tokens
         // we extract logits only from the last common token and from all ending tokens of each sequence
         int s0 = 0;
-        int max_seq_exceeded = 0;
         while (n_cur + (int) tasks[i1].required_tokens <= n_ctx) {
             auto& cur_task = tasks[i1];
             int n_logits = 0;
 
             int num_answers = cur_task.seq_tokens.size();
             if (s0 + num_answers > max_seq) {
-                max_seq_exceeded = s0 + num_answers;
+                if (i0 == i1) {
+                    LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %d)\n", __func__, i0, num_answers);
+                    return;
+                }
                 break;
             }
 
@@ -1590,11 +1592,7 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par
         }
 
         if (i0 == i1) {
-            if (max_seq_exceeded > max_seq) {
-                LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %d)\n", __func__, i0, max_seq_exceeded);
-            } else {
-                LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, tasks[i0].required_tokens);
-            }
+            LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, tasks[i0].required_tokens);
             return;
         }
 

From 9c756c15824b8fd90dceb7b1db506f9c87d250eb Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Thu, 14 Aug 2025 14:06:38 +0900
Subject: [PATCH 4/4] check if s0 is zero for -np check

---
 tools/perplexity/perplexity.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp
index 64f780a427aed..80cbb095da4cb 100644
--- a/tools/perplexity/perplexity.cpp
+++ b/tools/perplexity/perplexity.cpp
@@ -1548,7 +1548,7 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par
 
             int num_answers = cur_task.seq_tokens.size();
             if (s0 + num_answers > max_seq) {
-                if (i0 == i1) {
+                if (s0 == 0) {
                     LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %d)\n", __func__, i0, num_answers);
                     return;
                 }