diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index ace755c51d8a3..dca3f84a57562 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -203,8 +203,9 @@ int main(int argc, char ** argv) { const std::string token_str = llama_token_to_piece(ctx_tgt, id); - printf("%s", token_str.c_str()); - fflush(stdout); + if (!params.use_color) { + printf("%s", token_str.c_str()); + } if (id == llama_token_eos(model_tgt)) { has_eos = true; @@ -236,10 +237,18 @@ int main(int argc, char ** argv) { ++n_past_tgt; ++n_past_dft; ++i_dft; - + if (params.use_color) { + // Color token according to its origin sequence + printf("\u001b[%dm%s\u001b[37m", (36 - s_keep % 6), token_str.c_str()); + fflush(stdout); + } continue; } } + if (params.use_color) { + printf("%s", token_str.c_str()); + } + fflush(stdout); LOG("the sampled target token (%d, '%s') did not match, or we ran out of drafted tokens\n", id, token_str.c_str());