diff --git a/src/Microsoft.ML.Tokenizers/PACKAGE.md b/src/Microsoft.ML.Tokenizers/PACKAGE.md index 74f25e9478..33a6441183 100644 --- a/src/Microsoft.ML.Tokenizers/PACKAGE.md +++ b/src/Microsoft.ML.Tokenizers/PACKAGE.md @@ -30,12 +30,12 @@ string source = "Text tokenization is the process of splitting a string into a l Console.WriteLine($"Tokens: {tokenizer.CountTokens(source)}"); // prints: Tokens: 16 -var trimIndex = tokenizer.GetIndexByTokenCountFromEnd(source, 5, out string processedText, out _); -Console.WriteLine($"5 tokens from end: {processedText.Substring(trimIndex)}"); +var trimIndex = tokenizer.GetIndexByTokenCountFromEnd(source, 5, out string normalizedText, out _); +Console.WriteLine($"5 tokens from end: {(normalizedText ?? source).Substring(trimIndex)}"); // prints: 5 tokens from end: a list of tokens. -trimIndex = tokenizer.GetIndexByTokenCount(source, 5, out processedText, out _); -Console.WriteLine($"5 tokens from start: {processedText.Substring(0, trimIndex)}"); +trimIndex = tokenizer.GetIndexByTokenCount(source, 5, out normalizedText, out _); +Console.WriteLine($"5 tokens from start: {(normalizedText ?? source).Substring(0, trimIndex)}"); // prints: 5 tokens from start: Text tokenization is the IReadOnlyList ids = tokenizer.EncodeToIds(source);