From ae5ecee4880f9a6339edfff90ff0741259a9b506 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 27 Nov 2025 18:08:34 +0100 Subject: [PATCH 1/2] Fix bug in PACKAGE.md examples --- src/Microsoft.ML.Tokenizers/PACKAGE.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.ML.Tokenizers/PACKAGE.md b/src/Microsoft.ML.Tokenizers/PACKAGE.md index 74f25e9478..32fe1ba87b 100644 --- a/src/Microsoft.ML.Tokenizers/PACKAGE.md +++ b/src/Microsoft.ML.Tokenizers/PACKAGE.md @@ -30,12 +30,12 @@ string source = "Text tokenization is the process of splitting a string into a l Console.WriteLine($"Tokens: {tokenizer.CountTokens(source)}"); // prints: Tokens: 16 -var trimIndex = tokenizer.GetIndexByTokenCountFromEnd(source, 5, out string processedText, out _); -Console.WriteLine($"5 tokens from end: {processedText.Substring(trimIndex)}"); +var trimIndex = tokenizer.GetIndexByTokenCountFromEnd(source, 5, out string normalizedText, out _); +Console.WriteLine($"5 tokens from end: {source.Substring(trimIndex)}"); // prints: 5 tokens from end: a list of tokens. -trimIndex = tokenizer.GetIndexByTokenCount(source, 5, out processedText, out _); -Console.WriteLine($"5 tokens from start: {processedText.Substring(0, trimIndex)}"); +trimIndex = tokenizer.GetIndexByTokenCount(source, 5, out normalizedText, out _); +Console.WriteLine($"5 tokens from start: {source.Substring(0, trimIndex)}"); // prints: 5 tokens from start: Text tokenization is the IReadOnlyList ids = tokenizer.EncodeToIds(source); From 03ea204995ea0989d336c9d0039a75b550423627 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 1 Dec 2025 15:40:51 +0100 Subject: [PATCH 2/2] Apply suggestions from code review --- src/Microsoft.ML.Tokenizers/PACKAGE.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.Tokenizers/PACKAGE.md b/src/Microsoft.ML.Tokenizers/PACKAGE.md index 32fe1ba87b..33a6441183 100644 --- a/src/Microsoft.ML.Tokenizers/PACKAGE.md +++ b/src/Microsoft.ML.Tokenizers/PACKAGE.md @@ -31,11 +31,11 @@ Console.WriteLine($"Tokens: {tokenizer.CountTokens(source)}"); // prints: Tokens: 16 var trimIndex = tokenizer.GetIndexByTokenCountFromEnd(source, 5, out string normalizedText, out _); -Console.WriteLine($"5 tokens from end: {source.Substring(trimIndex)}"); +Console.WriteLine($"5 tokens from end: {(normalizedText ?? source).Substring(trimIndex)}"); // prints: 5 tokens from end: a list of tokens. trimIndex = tokenizer.GetIndexByTokenCount(source, 5, out normalizedText, out _); -Console.WriteLine($"5 tokens from start: {source.Substring(0, trimIndex)}"); +Console.WriteLine($"5 tokens from start: {(normalizedText ?? source).Substring(0, trimIndex)}"); // prints: 5 tokens from start: Text tokenization is the IReadOnlyList ids = tokenizer.EncodeToIds(source);