From bc4884d280c47f806bf98798cacc102516fc3023 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Thu, 14 Aug 2025 13:41:08 +0900 Subject: [PATCH 1/4] perplexity: give more information about constraints on failure This checks whether -np is insufficient vs context, and provides clues as to how much is needed for each. --- tools/perplexity/perplexity.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp index 81bdc7c19cae4..d929d8c921605 100644 --- a/tools/perplexity/perplexity.cpp +++ b/tools/perplexity/perplexity.cpp @@ -920,7 +920,7 @@ static void hellaswag_score(llama_context * ctx, const common_params & params) { } if (i0 == i1) { - LOG_ERR("%s : task %zu does not fit in the context window\n", __func__, i0); + LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, hs_data[i0].required_tokens); return; } @@ -1213,7 +1213,7 @@ static void winogrande_score(llama_context * ctx, const common_params & params) } if (i0 == i1) { - LOG_ERR("%s : task %zu does not fit in the context window\n", __func__, i0); + LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, data[i0].required_tokens); return; } @@ -1542,12 +1542,14 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par // the common prefix is shared among the 4 sequences to save tokens // we extract logits only from the last common token and from all ending tokens of each sequence int s0 = 0; + int max_seq_exceeded = 0; while (n_cur + (int) tasks[i1].required_tokens <= n_ctx) { auto& cur_task = tasks[i1]; int n_logits = 0; int num_answers = cur_task.seq_tokens.size(); if (s0 + num_answers > max_seq) { + max_seq_exceeded = s0 + num_answers; break; } @@ -1588,7 +1590,11 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par } if (i0 == i1) { - LOG_ERR("%s : task %zu does not fit in the context window\n", __func__, i0); + if (max_seq_exceeded > max_seq) { + LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %zu)\n", __func__, i0, max_seq_exceeded); + } else { + LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, tasks[i0].required_tokens); + } return; } From c80860c90783d8ae7a92b110ea38667071450576 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Thu, 14 Aug 2025 13:48:05 +0900 Subject: [PATCH 2/4] log formatting --- tools/perplexity/perplexity.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp index d929d8c921605..fe80344e2373c 100644 --- a/tools/perplexity/perplexity.cpp +++ b/tools/perplexity/perplexity.cpp @@ -1591,7 +1591,7 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par if (i0 == i1) { if (max_seq_exceeded > max_seq) { - LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %zu)\n", __func__, i0, max_seq_exceeded); + LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %d)\n", __func__, i0, max_seq_exceeded); } else { LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, tasks[i0].required_tokens); } From b1c8843bbff02fb04363e09b0d6378f845ab567e Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Thu, 14 Aug 2025 14:04:40 +0900 Subject: [PATCH 3/4] log error and return instead of storing max_seq_exceeded int --- tools/perplexity/perplexity.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp index fe80344e2373c..64f780a427aed 100644 --- a/tools/perplexity/perplexity.cpp +++ b/tools/perplexity/perplexity.cpp @@ -1542,14 +1542,16 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par // the common prefix is shared among the 4 sequences to save tokens // we extract logits only from the last common token and from all ending tokens of each sequence int s0 = 0; - int max_seq_exceeded = 0; while (n_cur + (int) tasks[i1].required_tokens <= n_ctx) { auto& cur_task = tasks[i1]; int n_logits = 0; int num_answers = cur_task.seq_tokens.size(); if (s0 + num_answers > max_seq) { - max_seq_exceeded = s0 + num_answers; + if (i0 == i1) { + LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %d)\n", __func__, i0, num_answers); + return; + } break; } @@ -1590,11 +1592,7 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par } if (i0 == i1) { - if (max_seq_exceeded > max_seq) { - LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %d)\n", __func__, i0, max_seq_exceeded); - } else { - LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, tasks[i0].required_tokens); - } + LOG_ERR("%s : task %zu does not fit in the context window (requires %lu tokens)\n", __func__, i0, tasks[i0].required_tokens); return; } From 9c756c15824b8fd90dceb7b1db506f9c87d250eb Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Thu, 14 Aug 2025 14:06:38 +0900 Subject: [PATCH 4/4] check if s0 is zero for -np check --- tools/perplexity/perplexity.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp index 64f780a427aed..80cbb095da4cb 100644 --- a/tools/perplexity/perplexity.cpp +++ b/tools/perplexity/perplexity.cpp @@ -1548,7 +1548,7 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par int num_answers = cur_task.seq_tokens.size(); if (s0 + num_answers > max_seq) { - if (i0 == i1) { + if (s0 == 0) { LOG_ERR("%s : task %zu requires a higher -np|--parallel value (at least %d)\n", __func__, i0, num_answers); return; }