From a43748e8da6cc33410645c83b1cced817431817d Mon Sep 17 00:00:00 2001 From: axunonb Date: Thu, 6 Nov 2025 11:37:40 +0100 Subject: [PATCH 1/5] Allow Unicode characters in `Selector`s Resolves #454 --- src/SmartFormat.Tests/Core/ParserTests.cs | 120 +++++++++++------- src/SmartFormat.Tests/Core/SettingsTests.cs | 63 ++++++--- src/SmartFormat/Core/Parsing/Parser.cs | 9 +- .../Core/Settings/ParserSettings.cs | 117 ++++++++++------- .../Core/Settings/SmartSettings.cs | 6 +- 5 files changed, 200 insertions(+), 115 deletions(-) diff --git a/src/SmartFormat.Tests/Core/ParserTests.cs b/src/SmartFormat.Tests/Core/ParserTests.cs index f1d4803d..f82eecf2 100644 --- a/src/SmartFormat.Tests/Core/ParserTests.cs +++ b/src/SmartFormat.Tests/Core/ParserTests.cs @@ -1,11 +1,12 @@ -using NUnit.Framework; -using SmartFormat.Core.Parsing; -using SmartFormat.Core.Settings; -using SmartFormat.Tests.TestUtils; -using System; +using System; +using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.RegularExpressions; +using NUnit.Framework; +using SmartFormat.Core.Parsing; +using SmartFormat.Core.Settings; +using SmartFormat.Tests.TestUtils; namespace SmartFormat.Tests.Core; @@ -66,9 +67,9 @@ public void Parser_Throws_Exceptions(string format) Assert.Throws(() => formatter.Test(format, args, "Error")); } - [TestCase("{V(LU)}")] // braces are illegal - [TestCase("{V LU }")] // blanks are illegal - [TestCase("{VĀLUĒ}")] // 0x100 and 0x112 are illegal chars + [TestCase("{V(LU)}")] // braces are not allowed + [TestCase("{V LU\\}")] // escape char is not allowed + [TestCase("{V?LU,}")] // ? and , are allowed chars public void Parser_Throws_On_Illegal_Selector_Chars(string format) { var parser = GetRegularParser(); @@ -81,9 +82,9 @@ public void Parser_Throws_On_Illegal_Selector_Chars(string format) { Assert.Multiple(() => { - // Throws, because selector contains 2 illegal characters + // Throws, because selector contains disallowed characters Assert.That(e, Is.InstanceOf()); - Assert.That(((ParsingErrors) e).Issues, Has.Count.EqualTo(2)); + Assert.That(((ParsingErrors) e).Issues, Has.Count.GreaterThanOrEqualTo(1)); }); } } @@ -202,13 +203,14 @@ public void Parser_Error_Action_MaintainTokens(string invalidTemplate, bool last public void Parser_Error_Action_OutputErrorInResult() { // | Literal | Erroneous | + // ▼ Selector must not contain { var invalidTemplate = "Hello, I'm {Name from {City}"; var parser = GetRegularParser(new SmartSettings {Parser = new ParserSettings {ErrorAction = ParseErrorAction.OutputErrorInResult}}); using var parsed = parser.ParseFormat(invalidTemplate); Assert.That(parsed.Items, Has.Count.EqualTo(1)); - Assert.That(parsed.Items[0].RawText, Does.StartWith("The format string has 3 issues")); + Assert.That(parsed.Items[0].RawText, Does.StartWith("The format string has 1 issue")); } [Test] @@ -415,8 +417,8 @@ public void Parser_NotifyParsingError() var res = formatter.Format("{NoName {Other} {Same", default(object)!); Assert.Multiple(() => { - Assert.That(parsingError!.Issues, Has.Count.EqualTo(3)); - Assert.That(parsingError.Issues[2].Issue, Is.EqualTo(new Parser.ParsingErrorText()[SmartFormat.Core.Parsing.Parser.ParsingError.MissingClosingBrace])); + Assert.That(parsingError!.Issues, Has.Count.EqualTo(2)); + Assert.That(parsingError.Issues[1].Issue, Is.EqualTo(new Parser.ParsingErrorText()[SmartFormat.Core.Parsing.Parser.ParsingError.MissingClosingBrace])); }); } @@ -568,6 +570,29 @@ public void Selectors_With_Custom_Operator_Character(string formatString, char c }); } + [TestCase("German |öäüßÖÄÜ!")] + [TestCase("Russian абвгдеёжзийклмн")] + [TestCase("French >éèêëçàùâîô")] + [TestCase("Spanish <áéíóúñü¡¿")] + [TestCase("Portuguese !ãõáâêéíóúç")] + [TestCase("Chinese 汉字测试")] + [TestCase("Arabic مرحبا بالعالم")] + [TestCase("Turkish çğöşüİı")] + [TestCase("Hindi नमस्ते दुनिया")] + public void Selector_WorksWithAllUnicodeChars(string selector) + { + // See https://github.com/axuno/SmartFormat/issues/454 + + const string expected = "The Value"; + // The default formatter with default settings should be able to handle any + // Unicode characters in selectors except the "magic" disallowed ones + var formatter = Smart.CreateDefaultSmartFormat(); + // Use the Unicode string as a selector of the placeholder + var template = $"{{{selector}}}"; + var result = formatter.Format(template, new Dictionary { { selector, expected } }); + Assert.That(result, Is.EqualTo(expected)); + } + [TestCase("{A?.B}")] [TestCase("{Selector0?.Selector1}")] [TestCase("{A?[1].B}")] @@ -681,11 +706,11 @@ public void ParseInputAsHtml(string input) Assert.That(literalText!.RawText, Is.EqualTo(input)); } - [TestCase("", false)] // should parse a placeholder - [TestCase("", false)] // should parse a placeholder - [TestCase("Something ! nice", true)] // illegal selector chars - [TestCase("Something ';}! nice", true)] // illegal selector chars - public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, bool shouldThrow) + [TestCase("", "{Placeholder}")] + [TestCase("", "{Placeholder}")] + [TestCase("Something ! nice", "{ color : #000; }")] + [TestCase("Something ';}! nice", "{const a = '';}")] + public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, string selector) { var parser = GetRegularParser(new SmartSettings { @@ -693,18 +718,13 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, bool shoul Parser = new ParserSettings { ErrorAction = ParseErrorAction.ThrowError, ParseInputAsHtml = false } }); - switch (shouldThrow) + var result = parser.ParseFormat(input); + Assert.Multiple(() => { - case true: - Assert.That(() => _ = parser.ParseFormat(input), Throws.TypeOf()); - break; - case false: - { - var result = parser.ParseFormat(input); - Assert.That(result.Items, Has.Count.EqualTo(3)); - break; - } - } + Assert.That(result.Items, Has.Count.EqualTo(3)); + Assert.That(((Placeholder) result.Items[1]).RawText, Is.EqualTo(selector)); + }); + } /// @@ -807,29 +827,31 @@ function interpolationSearch(sortedArray, seekIndex) { [TestCase(true, false)] public void StyleTags_Can_Be_Parsed_Without_Failure(bool inputIsHtml, bool shouldFail) { - var styles = @" - -

############### {TheVariable} ###############

-"; + .comment img { + border: 1px solid grey; + anything: 'xyz' + } + + .list-item { + border-bottom: 1px solid grey; + } + /* Comment: { which mixes up the parser without ParserSettings.ParseInputAsHtml = true */ + +

############### {TheVariable} ###############

+ + """; var parsingFailures = 0; var parser = GetRegularParser(new SmartSettings { diff --git a/src/SmartFormat.Tests/Core/SettingsTests.cs b/src/SmartFormat.Tests/Core/SettingsTests.cs index 74e93c1f..a0a6e6d4 100644 --- a/src/SmartFormat.Tests/Core/SettingsTests.cs +++ b/src/SmartFormat.Tests/Core/SettingsTests.cs @@ -11,7 +11,7 @@ public class SettingsTests public void TryingToAddDisallowedSelectorCharacters_Should_Throw() { var settings = new SmartSettings(); - Assert.That(() => settings.Parser.AddCustomSelectorChars(new[] {settings.Parser.PlaceholderBeginChar}), + Assert.That(() => settings.Parser.AddCustomSelectorChars([settings.Parser.PlaceholderBeginChar]), Throws.ArgumentException.And.Message.Contains($"{settings.Parser.PlaceholderBeginChar}")); } @@ -19,12 +19,29 @@ public void TryingToAddDisallowedSelectorCharacters_Should_Throw() public void ExistingSelectorCharacter_Should_Not_Be_Added() { var settings = new SmartSettings(); - settings.Parser.AddCustomSelectorChars(new[] {'A', ' '}); - settings.Parser.AddCustomSelectorChars(new[] {' '}); + settings.Parser.AddCustomSelectorChars(['A', ' ']); Assert.Multiple(() => { Assert.That(settings.Parser.CustomSelectorChars().Count(c => c == 'A'), Is.EqualTo(0)); - Assert.That(settings.Parser.CustomSelectorChars().Count(c => c == ' '), Is.EqualTo(1)); + Assert.That(settings.Parser.CustomSelectorChars().Count(c => c == ' '), Is.EqualTo(0)); + }); + } + + [Test] + public void ControlCharacters_Should_Be_Added_As_SelectorChars() + { + var settings = new SmartSettings(); + var controlChars = settings.Parser.ControlChars().ToList(); + settings.Parser.AddCustomSelectorChars(controlChars); + + Assert.Multiple(() => + { + Assert.That(settings.Parser.CustomSelectorChars().Count, Is.EqualTo(controlChars.Count)); + foreach (var c in settings.Parser.CustomSelectorChars()) + { + Assert.That(settings.Parser.DisallowedSelectorChars(), Does.Not.Contain(c), + $"Control char U+{(int)c:X4} should be allowed as selector char."); + } }); } @@ -32,7 +49,7 @@ public void ExistingSelectorCharacter_Should_Not_Be_Added() public void TryingToAddDisallowedOperatorCharacters_Should_Throw() { var settings = new SmartSettings(); - Assert.That(() => settings.Parser.AddCustomOperatorChars(new[] {settings.Parser.PlaceholderBeginChar}), + Assert.That(() => settings.Parser.AddCustomOperatorChars([settings.Parser.PlaceholderBeginChar]), Throws.ArgumentException.And.Message.Contains($"{settings.Parser.PlaceholderBeginChar}")); } @@ -40,8 +57,8 @@ public void TryingToAddDisallowedOperatorCharacters_Should_Throw() public void ExistingOperatorCharacter_Should_Not_Be_Added() { var settings = new SmartSettings(); - settings.Parser.AddCustomOperatorChars(new[] {settings.Parser.OperatorChars()[0], '°'}); - settings.Parser.AddCustomOperatorChars(new[] {'°'}); + settings.Parser.AddCustomOperatorChars([settings.Parser.OperatorChars()[0], '°']); + settings.Parser.AddCustomOperatorChars(['°']); Assert.Multiple(() => { @@ -50,27 +67,41 @@ public void ExistingOperatorCharacter_Should_Not_Be_Added() }); } - [TestCase('°')] // a custom char - [TestCase('A')] // a standard selector char - public void Add_CustomOperator_Used_As_Separator_Should_Throw(char operatorChar) + [TestCase('{')] + [TestCase('}')] + [TestCase(':')] + [TestCase('(')] + [TestCase(')')] + public void Add_Separators_As_Custom_Operator_Should_Throw(char operatorChar) { var settings = new SmartSettings(); - settings.Parser.AddCustomSelectorChars(new[] {operatorChar}); // reserve as selector char // try to add the same char as operator - Assert.That(() => settings.Parser.AddCustomOperatorChars(new[] {operatorChar}), - Throws.ArgumentException.And.Message.Contains($"{operatorChar}")); + Assert.That(() => settings.Parser.AddCustomOperatorChars([operatorChar]), + Throws.ArgumentException.And.Message.Contains($"'{operatorChar}'")); } - [TestCase('°')] // a custom char + [TestCase('°')] // a custom selector char [TestCase('.')] // a standard operator char public void Add_CustomSelector_Used_As_Operator_Should_Throw(char selectorChar) { var settings = new SmartSettings(); - settings.Parser.AddCustomOperatorChars(new[] {selectorChar}); // reserve as operator char + settings.Parser.AddCustomOperatorChars([selectorChar]); // reserve as operator char // try to add the same char as selector - Assert.That(() => settings.Parser.AddCustomSelectorChars(new[] {selectorChar}), + Assert.That(() => settings.Parser.AddCustomSelectorChars([selectorChar]), Throws.ArgumentException.And.Message.Contains($"{selectorChar}")); } + + [TestCase((char) 127)] // a custom char + [TestCase((char) 30)] // a standard operator char + public void Add_CustomOperator_Used_As_Selector_Should_Throw(char operatorChar) + { + var settings = new SmartSettings(); + settings.Parser.AddCustomSelectorChars([operatorChar]); // reserve as operator char + + // try to add the same char as selector + Assert.That(() => settings.Parser.AddCustomOperatorChars([operatorChar]), + Throws.ArgumentException.And.Message.Contains($"{operatorChar}")); + } } diff --git a/src/SmartFormat/Core/Parsing/Parser.cs b/src/SmartFormat/Core/Parsing/Parser.cs index 6830a2be..03b684ef 100644 --- a/src/SmartFormat/Core/Parsing/Parser.cs +++ b/src/SmartFormat/Core/Parsing/Parser.cs @@ -36,7 +36,7 @@ public class Parser private readonly List _operatorChars; private readonly List _customOperatorChars; private readonly ParserSettings _parserSettings; - private readonly List _validSelectorChars; + private readonly HashSet _disallowedSelectorChars; private readonly List _formatOptionsTerminatorChars; #endregion @@ -67,10 +67,7 @@ public Parser(SmartSettings? smartSettings = null) _customOperatorChars = _parserSettings.CustomOperatorChars(); _formatOptionsTerminatorChars = _parserSettings.FormatOptionsTerminatorChars(); - _validSelectorChars = new List(); - _validSelectorChars.AddRange(_parserSettings.SelectorChars()); - _validSelectorChars.AddRange(_parserSettings.OperatorChars()); - _validSelectorChars.AddRange(_parserSettings.CustomSelectorChars()); + _disallowedSelectorChars = _parserSettings.DisallowedSelectorChars(); } #endregion @@ -326,7 +323,7 @@ private void ProcessSelector(char inputChar, ParserState state, ParsingErrors pa else { // Ensure the selector characters are valid: - if (!_validSelectorChars.Contains(inputChar)) + if (_disallowedSelectorChars.Contains(inputChar)) parsingErrors.AddIssue(state.ResultFormat, $"'0x{Convert.ToUInt32(inputChar):X}': " + _parsingErrorText[ParsingError.InvalidCharactersInSelector], diff --git a/src/SmartFormat/Core/Settings/ParserSettings.cs b/src/SmartFormat/Core/Settings/ParserSettings.cs index f454d302..3f3d4313 100644 --- a/src/SmartFormat/Core/Settings/ParserSettings.cs +++ b/src/SmartFormat/Core/Settings/ParserSettings.cs @@ -16,10 +16,8 @@ namespace SmartFormat.Core.Settings; ///
public class ParserSettings { - private readonly List _alphanumericSelectorChars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-".ToList(); - - private readonly List _customSelectorChars = new List(); - private readonly List _customOperatorChars = new List(); + private readonly List _customSelectorChars = []; + private readonly List _customOperatorChars = []; /// /// Gets or sets the to use for the . @@ -28,48 +26,75 @@ public class ParserSettings public ParseErrorAction ErrorAction { get; set; } = ParseErrorAction.ThrowError; /// - /// The list of standard selector characters. + /// Gets a read-only list of the custom selector characters, which were set with . /// - internal List SelectorChars() => _alphanumericSelectorChars; + internal List CustomSelectorChars() => _customSelectorChars; /// - /// Gets a read-only list of the custom selector characters, which were set with . + /// The list of characters which are delimiting a selector. /// - internal List CustomSelectorChars() => _customSelectorChars; + internal HashSet SelectorDelimitingChars() => + [ + FormatterNameSeparator, + PlaceholderBeginChar, PlaceholderEndChar, + FormatterOptionsBeginChar, FormatterOptionsEndChar + ]; + + /// + /// Gets the set of control characters (ASCII 0-31 and 127). + /// + internal IEnumerable ControlChars() + { + for (var i = 0; i <= 31; i++) yield return (char) i; + yield return (char) 127; // delete character + } /// - /// Gets a list of characters which are allowed in a selector. + /// The list of characters which are disallowed in a selector. /// - internal List DisallowedSelectorChars() + internal HashSet DisallowedSelectorChars() { - var chars = new List { - CharLiteralEscapeChar, FormatterNameSeparator, AlignmentOperator, SelectorOperator, - PlaceholderBeginChar, PlaceholderEndChar, FormatterOptionsBeginChar, FormatterOptionsEndChar + var chars = new HashSet { + CharLiteralEscapeChar // avoid confusion with escape sequences }; - chars.AddRange(OperatorChars()); + chars.UnionWith(SelectorDelimitingChars()); + chars.UnionWith(OperatorChars()); // no overlaps + chars.UnionWith(CustomOperatorChars()); // no overlaps + // Hard to visualize and debug, disallow by default - can be added back as custom selector chars + chars.UnionWith(ControlChars()); + + // Remove characters used as custom selector chars. + // Note: Using chars.ExceptWith(_customOperatorChars) would not remove char 0. + foreach (var c in _customSelectorChars) chars.Remove(c); return chars; } /// - /// Gets a read-only list of the custom operator characters, which were set with . + /// Gets a list of the custom operator characters, which were set with . /// Contiguous operator characters are parsed as one operator (e.g. '?.'). /// internal List CustomOperatorChars() => _customOperatorChars; /// - /// Add a list of allowable selector characters on top of the setting. - /// This can be useful to support additional selector syntax such as math. - /// Characters in cannot be added. + /// Add a list of allowable selector characters on top of the default selector characters. + /// This can be useful to add control characters (ASCII 0-31 and 127) that are excluded by default. /// Operator chars and selector chars must be different. /// public void AddCustomSelectorChars(IList characters) { + var delimitingChars = SelectorDelimitingChars(); + var controlChars = ControlChars().ToList(); + var operatorChars = OperatorChars(); + var customOperatorChars = CustomOperatorChars(); + foreach (var c in characters) { - if (DisallowedSelectorChars().Contains(c) || _customOperatorChars.Contains(c)) - throw new ArgumentException($"Cannot add '{c}' as a custom selector character. It is disallowed or in use as an operator."); + // Explicitly disallow certain characters + if (delimitingChars.Contains(c) || c == CharLiteralEscapeChar + || operatorChars.Contains(c) || customOperatorChars.Contains(c)) + throw new ArgumentException($"Cannot add '{c}' as a custom selector character. It is disallowed or in use as an operator character."); - if (!_customSelectorChars.Contains(c) && !_alphanumericSelectorChars.Contains(c)) + if (controlChars.Contains(c)) _customSelectorChars.Add(c); } } @@ -80,13 +105,17 @@ public void AddCustomSelectorChars(IList characters) /// public void AddCustomOperatorChars(IList characters) { + var selectorDelimitingChars = SelectorDelimitingChars(); + var customSelectorChars = CustomSelectorChars(); + var operatorChars = OperatorChars(); + var customOperatorChars = CustomOperatorChars(); + foreach (var c in characters) { - if(DisallowedSelectorChars().Where(_ => OperatorChars().TrueForAll(ch => ch != c)).Contains(c) || - SelectorChars().Contains(c) || CustomSelectorChars().Contains(c)) + if (selectorDelimitingChars.Contains(c) || customSelectorChars.Contains(c)) throw new ArgumentException($"Cannot add '{c}' as a custom operator character. It is disallowed or in use as a selector."); - if (!OperatorChars().Contains(c) && !CustomOperatorChars().Contains(c)) + if (!operatorChars.Contains(c) && !customOperatorChars.Contains(c)) _customOperatorChars.Add(c); } } @@ -99,8 +128,7 @@ public void AddCustomOperatorChars(IList characters) /// string.Format(@"\t") will return the 2 characters "\" and "t" /// public bool ConvertCharacterStringLiterals { get; set; } = true; - - + /// /// Experimental. /// Gets or sets, whether the input format should be interpreted as HTML. @@ -126,68 +154,71 @@ public void AddCustomOperatorChars(IList characters) /// The character which separates the formatter name (if any exists) from other parts of the placeholder. /// E.g.: {Variable:FormatterName:argument} or {Variable:FormatterName} /// - internal char FormatterNameSeparator { get; } = ':'; + internal char FormatterNameSeparator => ':'; /// /// The standard operator characters. /// Contiguous operator characters are parsed as one operator (e.g. '?.'). /// - internal List OperatorChars() => new() - {SelectorOperator, NullableOperator, AlignmentOperator, ListIndexBeginChar, ListIndexEndChar}; + internal List OperatorChars() => + [ + SelectorOperator, NullableOperator, AlignmentOperator, ListIndexBeginChar, ListIndexEndChar + ]; /// /// The character which separates the selector for alignment. E.g.: Smart.Format("Name: {name,10}") /// - internal char AlignmentOperator { get; } = ','; + internal char AlignmentOperator => ','; /// /// The character which separates two or more selectors E.g.: "First.Second.Third" /// - internal char SelectorOperator { get; } = '.'; + internal char SelectorOperator => '.'; /// /// The character which flags the selector as . /// The character after must be the . /// E.g.: "First?.Second" /// - internal char NullableOperator { get; } = '?'; + internal char NullableOperator => '?'; /// /// Gets the character indicating the start of a . /// - internal char PlaceholderBeginChar { get; } = '{'; + internal char PlaceholderBeginChar => '{'; /// /// Gets the character indicating the end of a . /// - internal char PlaceholderEndChar { get; } = '}'; + internal char PlaceholderEndChar => '}'; /// - /// Gets the character indicating the begin of formatter options. + /// Gets the character indicating the beginning of formatter options. /// - internal char FormatterOptionsBeginChar { get; } = '('; + internal char FormatterOptionsBeginChar => '('; /// /// Gets the character indicating the end of formatter options. /// - internal char FormatterOptionsEndChar { get; } = ')'; + internal char FormatterOptionsEndChar => ')'; /// - /// Gets the character indicating the begin of a list index, like in "{Numbers[0]}" + /// Gets the character indicating the beginning of a list index, like in '{Numbers[0]}' /// - internal char ListIndexBeginChar { get; } = '['; + internal char ListIndexBeginChar => '['; /// - /// Gets the character indicating the end of a list index, like in "{Numbers[0]}" + /// Gets the character indicating the end of a list index, like in '{Numbers[0]}' /// - internal char ListIndexEndChar { get; } = ']'; + internal char ListIndexEndChar => ']'; /// /// Characters which terminate parsing of format options. /// To use them as options, they must be escaped (preceded) by the . /// - internal List FormatOptionsTerminatorChars() => new() { + internal List FormatOptionsTerminatorChars() => + [ FormatterNameSeparator, FormatterOptionsBeginChar, FormatterOptionsEndChar, PlaceholderBeginChar, PlaceholderEndChar - }; + ]; } diff --git a/src/SmartFormat/Core/Settings/SmartSettings.cs b/src/SmartFormat/Core/Settings/SmartSettings.cs index 5c035f56..4acf548c 100644 --- a/src/SmartFormat/Core/Settings/SmartSettings.cs +++ b/src/SmartFormat/Core/Settings/SmartSettings.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; namespace SmartFormat.Core.Settings; @@ -45,6 +46,7 @@ public SmartSettings() /// The default is . /// [Obsolete("Use 'SmartSettings.Formatter.ErrorAction' instead.", true)] + [ExcludeFromCodeCoverage] public ErrorAction FormatErrorAction { get => (ErrorAction) Formatter.ErrorAction; @@ -56,6 +58,7 @@ public ErrorAction FormatErrorAction /// The default is . /// [Obsolete("Use 'SmartSettings.Parser.ErrorAction' instead.", true)] + [ExcludeFromCodeCoverage] public ErrorAction ParseErrorAction { get => (ErrorAction) Parser.ErrorAction; @@ -76,6 +79,7 @@ public ErrorAction ParseErrorAction /// string.Format(@"\t") will return the 2 characters "\" and "t" /// [Obsolete("Use SmartSettings.Parser.ConvertCharacterStringLiterals instead", true)] + [ExcludeFromCodeCoverage] public bool ConvertCharacterStringLiterals { get => Parser.ConvertCharacterStringLiterals; @@ -126,4 +130,4 @@ public StringComparison GetCaseSensitivityComparison() /// These settings must be defined before any class calling the object pools is instantiated. They cannot be changed later. /// public PoolSettings Pooling { get; set; } -} \ No newline at end of file +} From 6b1dbec77a914c95bbc848823a9b7b0012bb3d80 Mon Sep 17 00:00:00 2001 From: axunonb Date: Thu, 6 Nov 2025 18:10:34 +0100 Subject: [PATCH 2/5] Refactor internal `ParserSettings` to use static or const members Refactored internal `ParserSettings` to convert instance-level properties and methods to static or const members. --- src/SmartFormat.Tests/Core/SettingsTests.cs | 24 ++++----- src/SmartFormat/Core/Extensions/Source.cs | 2 +- src/SmartFormat/Core/Parsing/Parser.cs | 36 ++++++------- src/SmartFormat/Core/Parsing/Placeholder.cs | 19 +++---- .../Core/Settings/ParserSettings.cs | 54 ++++++++----------- src/SmartFormat/Evaluator.cs | 4 +- src/SmartFormat/Extensions/ListFormatter.cs | 4 +- 7 files changed, 68 insertions(+), 75 deletions(-) diff --git a/src/SmartFormat.Tests/Core/SettingsTests.cs b/src/SmartFormat.Tests/Core/SettingsTests.cs index a0a6e6d4..9e60cc6c 100644 --- a/src/SmartFormat.Tests/Core/SettingsTests.cs +++ b/src/SmartFormat.Tests/Core/SettingsTests.cs @@ -11,8 +11,8 @@ public class SettingsTests public void TryingToAddDisallowedSelectorCharacters_Should_Throw() { var settings = new SmartSettings(); - Assert.That(() => settings.Parser.AddCustomSelectorChars([settings.Parser.PlaceholderBeginChar]), - Throws.ArgumentException.And.Message.Contains($"{settings.Parser.PlaceholderBeginChar}")); + Assert.That(() => settings.Parser.AddCustomSelectorChars([ParserSettings.PlaceholderBeginChar]), + Throws.ArgumentException.And.Message.Contains($"{ParserSettings.PlaceholderBeginChar}")); } [Test] @@ -22,8 +22,8 @@ public void ExistingSelectorCharacter_Should_Not_Be_Added() settings.Parser.AddCustomSelectorChars(['A', ' ']); Assert.Multiple(() => { - Assert.That(settings.Parser.CustomSelectorChars().Count(c => c == 'A'), Is.EqualTo(0)); - Assert.That(settings.Parser.CustomSelectorChars().Count(c => c == ' '), Is.EqualTo(0)); + Assert.That(settings.Parser.CustomSelectorChars.Count(c => c == 'A'), Is.EqualTo(0)); + Assert.That(settings.Parser.CustomSelectorChars.Count(c => c == ' '), Is.EqualTo(0)); }); } @@ -31,13 +31,13 @@ public void ExistingSelectorCharacter_Should_Not_Be_Added() public void ControlCharacters_Should_Be_Added_As_SelectorChars() { var settings = new SmartSettings(); - var controlChars = settings.Parser.ControlChars().ToList(); + var controlChars = ParserSettings.ControlChars().ToList(); settings.Parser.AddCustomSelectorChars(controlChars); Assert.Multiple(() => { - Assert.That(settings.Parser.CustomSelectorChars().Count, Is.EqualTo(controlChars.Count)); - foreach (var c in settings.Parser.CustomSelectorChars()) + Assert.That(settings.Parser.CustomSelectorChars, Has.Count.EqualTo(controlChars.Count)); + foreach (var c in settings.Parser.CustomSelectorChars) { Assert.That(settings.Parser.DisallowedSelectorChars(), Does.Not.Contain(c), $"Control char U+{(int)c:X4} should be allowed as selector char."); @@ -49,21 +49,21 @@ public void ControlCharacters_Should_Be_Added_As_SelectorChars() public void TryingToAddDisallowedOperatorCharacters_Should_Throw() { var settings = new SmartSettings(); - Assert.That(() => settings.Parser.AddCustomOperatorChars([settings.Parser.PlaceholderBeginChar]), - Throws.ArgumentException.And.Message.Contains($"{settings.Parser.PlaceholderBeginChar}")); + Assert.That(() => settings.Parser.AddCustomOperatorChars([ParserSettings.PlaceholderBeginChar]), + Throws.ArgumentException.And.Message.Contains($"{ParserSettings.PlaceholderBeginChar}")); } [Test] public void ExistingOperatorCharacter_Should_Not_Be_Added() { var settings = new SmartSettings(); - settings.Parser.AddCustomOperatorChars([settings.Parser.OperatorChars()[0], '°']); + settings.Parser.AddCustomOperatorChars([ParserSettings.OperatorChars[0], '°']); settings.Parser.AddCustomOperatorChars(['°']); Assert.Multiple(() => { - Assert.That(settings.Parser.CustomOperatorChars().Count(c => c == settings.Parser.OperatorChars()[0]), Is.EqualTo(0)); - Assert.That(settings.Parser.CustomOperatorChars().Count(c => c == '°'), Is.EqualTo(1)); + Assert.That(settings.Parser.CustomOperatorChars.Count(c => c == ParserSettings.OperatorChars[0]), Is.EqualTo(0)); + Assert.That(settings.Parser.CustomOperatorChars.Count(c => c == '°'), Is.EqualTo(1)); }); } diff --git a/src/SmartFormat/Core/Extensions/Source.cs b/src/SmartFormat/Core/Extensions/Source.cs index c8d06c65..d73f4682 100644 --- a/src/SmartFormat/Core/Extensions/Source.cs +++ b/src/SmartFormat/Core/Extensions/Source.cs @@ -49,7 +49,7 @@ private bool HasNullableOperator(ISelectorInfo selectorInfo) #pragma warning disable S3267 // Don't use LINQ in favor of less GC foreach (var s in selectorInfo.Placeholder.Selectors) { - if (s.OperatorLength > 1 && s.BaseString[s.OperatorStartIndex] == _smartSettings.Parser.NullableOperator) + if (s.OperatorLength > 1 && s.BaseString[s.OperatorStartIndex] == ParserSettings.NullableOperator) return true; } #pragma warning restore S3267 // Restore: Loops should be simplified with "LINQ" expressions diff --git a/src/SmartFormat/Core/Parsing/Parser.cs b/src/SmartFormat/Core/Parsing/Parser.cs index 03b684ef..992c5381 100644 --- a/src/SmartFormat/Core/Parsing/Parser.cs +++ b/src/SmartFormat/Core/Parsing/Parser.cs @@ -63,9 +63,9 @@ public Parser(SmartSettings? smartSettings = null) { Settings = smartSettings ?? new SmartSettings(); _parserSettings = Settings.Parser; - _operatorChars = _parserSettings.OperatorChars(); - _customOperatorChars = _parserSettings.CustomOperatorChars(); - _formatOptionsTerminatorChars = _parserSettings.FormatOptionsTerminatorChars(); + _operatorChars = ParserSettings.OperatorChars; + _customOperatorChars = _parserSettings.CustomOperatorChars; + _formatOptionsTerminatorChars = ParserSettings.FormatOptionsTerminatorChars; _disallowedSelectorChars = _parserSettings.DisallowedSelectorChars(); } @@ -239,19 +239,19 @@ private void ProcessLiteralText(char inputChar, ParserState state, ParsingErrors return; } - if (inputChar == _parserSettings.PlaceholderBeginChar) + if (inputChar == ParserSettings.PlaceholderBeginChar) { AddLiteralCharsParsedBefore(state); - if (EscapeLikeStringFormat(_parserSettings.PlaceholderBeginChar, state)) return; + if (EscapeLikeStringFormat(ParserSettings.PlaceholderBeginChar, state)) return; // Context transition CreateNewPlaceholder(ref nestedDepth, state, out currentPlaceholder); currentContext = ParseContext.SelectorHeader; } - else if (inputChar == _parserSettings.PlaceholderEndChar) + else if (inputChar == ParserSettings.PlaceholderEndChar) { AddLiteralCharsParsedBefore(state); - if (EscapeLikeStringFormat(_parserSettings.PlaceholderEndChar, state)) return; + if (EscapeLikeStringFormat(ParserSettings.PlaceholderEndChar, state)) return; if (HasProcessedTooManyClosingBraces(parsingErrors, state)) return; // End of a nested placeholder's Format. @@ -292,7 +292,7 @@ private void ProcessSelector(char inputChar, ParserState state, ParsingErrors pa } state.Index.LastEnd = state.Index.SafeAdd(state.Index.Current, 1); } - else if (inputChar == _parserSettings.FormatterNameSeparator) + else if (inputChar == ParserSettings.FormatterNameSeparator) { AddLastSelector(ref currentPlaceholder, state, parsingErrors); @@ -308,7 +308,7 @@ private void ProcessSelector(char inputChar, ParserState state, ParsingErrors pa // We are now parsing the literal text *inside* the placeholder's format. currentContext = ParseContext.LiteralText; } - else if (inputChar == _parserSettings.PlaceholderEndChar) + else if (inputChar == ParserSettings.PlaceholderEndChar) { AddLastSelector(ref currentPlaceholder, state, parsingErrors); @@ -465,8 +465,8 @@ private void ParseAlternativeEscaping(ParserState state) throw new ArgumentException($"Unrecognized escape sequence at the end of the literal"); // **** Alternative brace escaping with { or } following the escape character **** - if (state.InputFormat[indexNextChar] == _parserSettings.PlaceholderBeginChar || - state.InputFormat[indexNextChar] == _parserSettings.PlaceholderEndChar) + if (state.InputFormat[indexNextChar] == ParserSettings.PlaceholderBeginChar || + state.InputFormat[indexNextChar] == ParserSettings.PlaceholderEndChar) { // Finish the last text item: if (state.Index.Current != state.Index.LastEnd) @@ -509,7 +509,7 @@ private void ParseAlternativeEscaping(ParserState state) private bool ParseNamedFormatter(ParserState state) { var inputChar = state.InputFormat[state.Index.Current]; - if (inputChar == _parserSettings.FormatterOptionsBeginChar) + if (inputChar == ParserSettings.FormatterOptionsBeginChar) { var emptyName = state.Index.NamedFormatterStart == state.Index.Current; if (emptyName) @@ -521,16 +521,16 @@ private bool ParseNamedFormatter(ParserState state) // Note: This short-circuits the Parser.ParseFormat main loop ParseFormatOptions(state); } - else if (inputChar == _parserSettings.FormatterOptionsEndChar || inputChar == _parserSettings.FormatterNameSeparator) + else if (inputChar == ParserSettings.FormatterOptionsEndChar || inputChar == ParserSettings.FormatterNameSeparator) { - if (inputChar == _parserSettings.FormatterOptionsEndChar) + if (inputChar == ParserSettings.FormatterOptionsEndChar) { var hasOpeningParenthesis = state.Index.NamedFormatterOptionsStart != PositionUndefined; // ensure no trailing chars past ')' var nextCharIndex = state.Index.SafeAdd(state.Index.Current, 1); var nextCharIsValid = nextCharIndex < state.InputFormat.Length && - (state.InputFormat[nextCharIndex] == _parserSettings.FormatterNameSeparator || state.InputFormat[nextCharIndex] == _parserSettings.PlaceholderEndChar); + (state.InputFormat[nextCharIndex] == ParserSettings.FormatterNameSeparator || state.InputFormat[nextCharIndex] == ParserSettings.PlaceholderEndChar); if (!hasOpeningParenthesis || !nextCharIsValid) { @@ -540,7 +540,7 @@ private bool ParseNamedFormatter(ParserState state) state.Index.NamedFormatterOptionsEnd = state.Index.Current; - if (state.InputFormat[nextCharIndex] == _parserSettings.FormatterNameSeparator) state.Index.Current++; + if (state.InputFormat[nextCharIndex] == ParserSettings.FormatterNameSeparator) state.Index.Current++; } var nameIsEmpty = state.Index.NamedFormatterStart == state.Index.Current; @@ -601,8 +601,8 @@ private void AddLastSelector(ref Placeholder currentPlaceholder, ParserState sta if (state.Index.Current != state.Index.LastEnd || currentPlaceholder.Selectors.Count > 0 && currentPlaceholder.Selectors[currentPlaceholder.Selectors.Count - 1].Length > 0 && state.Index.Current - state.Index.Operator == 1 && - (state.InputFormat[state.Index.Operator] == _parserSettings.ListIndexEndChar || - state.InputFormat[state.Index.Operator] == _parserSettings.NullableOperator)) + (state.InputFormat[state.Index.Operator] == ParserSettings.ListIndexEndChar || + state.InputFormat[state.Index.Operator] == ParserSettings.NullableOperator)) currentPlaceholder.AddSelector(SelectorPool.Instance.Get().Initialize(Settings, currentPlaceholder, state.InputFormat, state.Index.LastEnd, state.Index.Current, state.Index.Operator, state.Index.Selector)); else if (state.Index.Operator != state.Index.Current) parsingErrors.AddIssue(state.ResultFormat, diff --git a/src/SmartFormat/Core/Parsing/Placeholder.cs b/src/SmartFormat/Core/Parsing/Placeholder.cs index 7b464ade..d31a48d7 100644 --- a/src/SmartFormat/Core/Parsing/Placeholder.cs +++ b/src/SmartFormat/Core/Parsing/Placeholder.cs @@ -5,6 +5,7 @@ using System; using System.Buffers; using System.Collections.Generic; +using SmartFormat.Core.Settings; using SmartFormat.Pooling.ObjectPools; using SmartFormat.Pooling.SmartPools; using SmartFormat.ZString; @@ -138,7 +139,7 @@ internal void AddSelector(Selector selector) // 1. The operator character must have a value, usually ',' // 2. The alignment is an integer value if (selector.OperatorLength > 0 - && selector.Operator[0] == SmartSettings.Parser.AlignmentOperator + && selector.Operator[0] == ParserSettings.AlignmentOperator && int.TryParse(selector.RawText, out var alignment)) { Alignment = alignment; @@ -231,41 +232,41 @@ public override string ToString() using var buffer = new ZCharArray(Length + 2); // +2 for the braces - buffer.Write(SmartSettings.Parser.PlaceholderBeginChar); + buffer.Write(ParserSettings.PlaceholderBeginChar); foreach (var s in Selectors) { // alignment operators will be appended later - if (s.Operator.Length > 0 && s.Operator[0] == SmartSettings.Parser.AlignmentOperator) continue; + if (s.Operator.Length > 0 && s.Operator[0] == ParserSettings.AlignmentOperator) continue; var selectorSpan = s.BaseString.AsSpan(s.OperatorStartIndex, s.EndIndex - s.OperatorStartIndex); buffer.Write(selectorSpan); } if (Alignment != 0) { - buffer.Write(SmartSettings.Parser.AlignmentOperator); + buffer.Write(ParserSettings.AlignmentOperator); buffer.Write(Alignment.ToString()); } if (FormatterName != string.Empty) { - buffer.Write(SmartSettings.Parser.FormatterNameSeparator); + buffer.Write(ParserSettings.FormatterNameSeparator); buffer.Write(FormatterName); if (FormatterOptions != string.Empty) { - buffer.Write(SmartSettings.Parser.FormatterOptionsBeginChar); + buffer.Write(ParserSettings.FormatterOptionsBeginChar); buffer.Write(FormatterOptions); - buffer.Write(SmartSettings.Parser.FormatterOptionsEndChar); + buffer.Write(ParserSettings.FormatterOptionsEndChar); } } if (Format != null) { - buffer.Write(SmartSettings.Parser.FormatterNameSeparator); + buffer.Write(ParserSettings.FormatterNameSeparator); buffer.Write(Format.AsSpan()); } - buffer.Write(SmartSettings.Parser.PlaceholderEndChar); + buffer.Write(ParserSettings.PlaceholderEndChar); #if NETSTANDARD2_1 || NET6_0_OR_GREATER _toStringCache = new string(buffer.GetSpan()); diff --git a/src/SmartFormat/Core/Settings/ParserSettings.cs b/src/SmartFormat/Core/Settings/ParserSettings.cs index 3f3d4313..8fa41ca9 100644 --- a/src/SmartFormat/Core/Settings/ParserSettings.cs +++ b/src/SmartFormat/Core/Settings/ParserSettings.cs @@ -28,12 +28,12 @@ public class ParserSettings /// /// Gets a read-only list of the custom selector characters, which were set with . /// - internal List CustomSelectorChars() => _customSelectorChars; + internal List CustomSelectorChars => _customSelectorChars; /// /// The list of characters which are delimiting a selector. /// - internal HashSet SelectorDelimitingChars() => + internal static readonly HashSet SelectorDelimitingChars = [ FormatterNameSeparator, PlaceholderBeginChar, PlaceholderEndChar, @@ -43,7 +43,7 @@ internal HashSet SelectorDelimitingChars() => /// /// Gets the set of control characters (ASCII 0-31 and 127). /// - internal IEnumerable ControlChars() + internal static IEnumerable ControlChars() { for (var i = 0; i <= 31; i++) yield return (char) i; yield return (char) 127; // delete character @@ -57,9 +57,9 @@ internal HashSet DisallowedSelectorChars() var chars = new HashSet { CharLiteralEscapeChar // avoid confusion with escape sequences }; - chars.UnionWith(SelectorDelimitingChars()); - chars.UnionWith(OperatorChars()); // no overlaps - chars.UnionWith(CustomOperatorChars()); // no overlaps + chars.UnionWith(SelectorDelimitingChars); + chars.UnionWith(OperatorChars); // no overlaps + chars.UnionWith(CustomOperatorChars); // no overlaps // Hard to visualize and debug, disallow by default - can be added back as custom selector chars chars.UnionWith(ControlChars()); @@ -73,7 +73,7 @@ internal HashSet DisallowedSelectorChars() /// Gets a list of the custom operator characters, which were set with . /// Contiguous operator characters are parsed as one operator (e.g. '?.'). /// - internal List CustomOperatorChars() => _customOperatorChars; + internal List CustomOperatorChars => _customOperatorChars; /// /// Add a list of allowable selector characters on top of the default selector characters. @@ -82,16 +82,13 @@ internal HashSet DisallowedSelectorChars() /// public void AddCustomSelectorChars(IList characters) { - var delimitingChars = SelectorDelimitingChars(); var controlChars = ControlChars().ToList(); - var operatorChars = OperatorChars(); - var customOperatorChars = CustomOperatorChars(); foreach (var c in characters) { // Explicitly disallow certain characters - if (delimitingChars.Contains(c) || c == CharLiteralEscapeChar - || operatorChars.Contains(c) || customOperatorChars.Contains(c)) + if (SelectorDelimitingChars.Contains(c) || c == CharLiteralEscapeChar + || OperatorChars.Contains(c) || CustomOperatorChars.Contains(c)) throw new ArgumentException($"Cannot add '{c}' as a custom selector character. It is disallowed or in use as an operator character."); if (controlChars.Contains(c)) @@ -105,17 +102,12 @@ public void AddCustomSelectorChars(IList characters) /// public void AddCustomOperatorChars(IList characters) { - var selectorDelimitingChars = SelectorDelimitingChars(); - var customSelectorChars = CustomSelectorChars(); - var operatorChars = OperatorChars(); - var customOperatorChars = CustomOperatorChars(); - foreach (var c in characters) { - if (selectorDelimitingChars.Contains(c) || customSelectorChars.Contains(c)) + if (SelectorDelimitingChars.Contains(c) || CustomSelectorChars.Contains(c)) throw new ArgumentException($"Cannot add '{c}' as a custom operator character. It is disallowed or in use as a selector."); - if (!operatorChars.Contains(c) && !customOperatorChars.Contains(c)) + if (!OperatorChars.Contains(c) && !_customOperatorChars.Contains(c)) _customOperatorChars.Add(c); } } @@ -154,13 +146,13 @@ public void AddCustomOperatorChars(IList characters) /// The character which separates the formatter name (if any exists) from other parts of the placeholder. /// E.g.: {Variable:FormatterName:argument} or {Variable:FormatterName} /// - internal char FormatterNameSeparator => ':'; + internal const char FormatterNameSeparator = ':'; /// /// The standard operator characters. /// Contiguous operator characters are parsed as one operator (e.g. '?.'). /// - internal List OperatorChars() => + internal static readonly List OperatorChars = [ SelectorOperator, NullableOperator, AlignmentOperator, ListIndexBeginChar, ListIndexEndChar ]; @@ -168,55 +160,55 @@ internal List OperatorChars() => /// /// The character which separates the selector for alignment. E.g.: Smart.Format("Name: {name,10}") /// - internal char AlignmentOperator => ','; + internal const char AlignmentOperator = ','; /// /// The character which separates two or more selectors E.g.: "First.Second.Third" /// - internal char SelectorOperator => '.'; + internal const char SelectorOperator = '.'; /// /// The character which flags the selector as . /// The character after must be the . /// E.g.: "First?.Second" /// - internal char NullableOperator => '?'; + internal const char NullableOperator = '?'; /// /// Gets the character indicating the start of a . /// - internal char PlaceholderBeginChar => '{'; + internal const char PlaceholderBeginChar = '{'; /// /// Gets the character indicating the end of a . /// - internal char PlaceholderEndChar => '}'; + internal const char PlaceholderEndChar = '}'; /// /// Gets the character indicating the beginning of formatter options. /// - internal char FormatterOptionsBeginChar => '('; + internal const char FormatterOptionsBeginChar = '('; /// /// Gets the character indicating the end of formatter options. /// - internal char FormatterOptionsEndChar => ')'; + internal const char FormatterOptionsEndChar = ')'; /// /// Gets the character indicating the beginning of a list index, like in '{Numbers[0]}' /// - internal char ListIndexBeginChar => '['; + internal const char ListIndexBeginChar = '['; /// /// Gets the character indicating the end of a list index, like in '{Numbers[0]}' /// - internal char ListIndexEndChar => ']'; + internal const char ListIndexEndChar = ']'; /// /// Characters which terminate parsing of format options. /// To use them as options, they must be escaped (preceded) by the . /// - internal List FormatOptionsTerminatorChars() => + internal static readonly List FormatOptionsTerminatorChars = [ FormatterNameSeparator, FormatterOptionsBeginChar, FormatterOptionsEndChar, PlaceholderBeginChar, PlaceholderEndChar diff --git a/src/SmartFormat/Evaluator.cs b/src/SmartFormat/Evaluator.cs index d2e85a5c..2418609a 100644 --- a/src/SmartFormat/Evaluator.cs +++ b/src/SmartFormat/Evaluator.cs @@ -204,7 +204,7 @@ private void HandleNestedScope(FormattingInfo formattingInfo, Selector selector, /// /// Skip empty selectors and alignment-only selectors. /// - private bool SkipThisSelector(Selector selector) + private static bool SkipThisSelector(Selector selector) { // Don't evaluate empty selectors // (used e.g. for Settings.Parser.NullableOperator and Settings.Parser.ListIndexEndChar final operators) @@ -212,7 +212,7 @@ private bool SkipThisSelector(Selector selector) // Do not evaluate alignment-only selectors if (selector.Operator.Length > 0 && - selector.Operator[0] == _settings.Parser.AlignmentOperator) return true; + selector.Operator[0] == ParserSettings.AlignmentOperator) return true; return false; } diff --git a/src/SmartFormat/Extensions/ListFormatter.cs b/src/SmartFormat/Extensions/ListFormatter.cs index f3fc6323..13689d88 100644 --- a/src/SmartFormat/Extensions/ListFormatter.cs +++ b/src/SmartFormat/Extensions/ListFormatter.cs @@ -300,14 +300,14 @@ private static void WriteSpacer(FormattingInfo formattingInfo, Format spacer, ob /// /// The nullable operator '?' can be followed by a dot (like '?.') or a square brace (like '?[') /// - private bool HasNullableOperator(IFormattingInfo formattingInfo) + private static bool HasNullableOperator(IFormattingInfo formattingInfo) { if (formattingInfo.Placeholder != null) { #pragma warning disable S3267 // Don't use LINQ in favor of less GC foreach (var s in formattingInfo.Placeholder.Selectors) { - if (s.OperatorLength > 0 && s.BaseString[s.OperatorStartIndex] == _smartSettings.Parser.NullableOperator) + if (s.OperatorLength > 0 && s.BaseString[s.OperatorStartIndex] == ParserSettings.NullableOperator) return true; } #pragma warning restore S3267 // Restore: Loops should be simplified with "LINQ" expressions From b787b49d41040fa03f8acf82b7293d83651bb7e1 Mon Sep 17 00:00:00 2001 From: axunonb Date: Sun, 9 Nov 2025 20:43:49 +0100 Subject: [PATCH 3/5] feat: Filter `Selector` chars by allowlist or blocklilst (#511) --- src/SmartFormat.Tests/Core/CharSetTests.cs | 54 +++++ src/SmartFormat.Tests/Core/ParserTests.cs | 107 +++++++-- src/SmartFormat.Tests/Core/SettingsTests.cs | 13 +- src/SmartFormat/Core/Parsing/CharSet.cs | 216 ++++++++++++++++++ src/SmartFormat/Core/Parsing/Parser.cs | 52 +++-- src/SmartFormat/Core/Settings/FilterType.cs | 23 ++ .../Core/Settings/ParserSettings.cs | 192 ++++++++++------ 7 files changed, 550 insertions(+), 107 deletions(-) create mode 100644 src/SmartFormat.Tests/Core/CharSetTests.cs create mode 100644 src/SmartFormat/Core/Parsing/CharSet.cs create mode 100644 src/SmartFormat/Core/Settings/FilterType.cs diff --git a/src/SmartFormat.Tests/Core/CharSetTests.cs b/src/SmartFormat.Tests/Core/CharSetTests.cs new file mode 100644 index 00000000..968b446a --- /dev/null +++ b/src/SmartFormat.Tests/Core/CharSetTests.cs @@ -0,0 +1,54 @@ +using System; +using System.Linq; +using NUnit.Framework; +using SmartFormat.Core.Parsing; + +namespace SmartFormat.Tests.Core; + +[TestFixture] +internal class CharSetTests +{ + [Test] + public void CharSet_Add_Remove() + { + char[] asciiChars = ['A', 'B', 'C']; + char[] nonAsciiChars = ['Ā', 'Б', '中']; + var charSet = new CharSet(); + charSet.AddRange(asciiChars.AsEnumerable()); + charSet.AddRange(nonAsciiChars.AsSpan()); + var countBeforeRemoval = charSet.Count; + var existingRemoved = charSet.Remove('C'); + charSet.Remove('中'); + // trying to remove a not existing char returns false + var nonExistingRemoved = charSet.Remove('?'); + var count = charSet.Count; + + Assert.Multiple(() => + { + Assert.That(countBeforeRemoval, Is.EqualTo(asciiChars.Length + nonAsciiChars.Length)); + Assert.That(count, Is.EqualTo(countBeforeRemoval - 2)); + Assert.That(existingRemoved, Is.True); + Assert.That(nonExistingRemoved, Is.False); + }); + } + + [Test] + public void CharSet_CreateFromSpan_GetCharacters_Contains() + { + char[] asciiAndNonAscii = ['\0', 'A', 'B', 'C', 'Ā', 'Б', '中']; + var charSet = new CharSet(asciiAndNonAscii.AsSpan()); + + Assert.Multiple(() => + { + Assert.That(charSet, Has.Count.EqualTo(7)); + Assert.That(charSet.Contains('A'), Is.True); // ASCII + Assert.That(charSet.Contains('\0'), Is.True); // control character + Assert.That(charSet.Contains('中'), Is.True); // non-ASCII + Assert.That(charSet.Contains('?'), Is.False); + Assert.That(charSet.GetCharacters(), Is.EquivalentTo(asciiAndNonAscii)); + charSet.Clear(); + Assert.That(charSet, Has.Count.EqualTo(0)); + Assert.That(charSet.GetCharacters(), Is.Empty); + }); + } +} diff --git a/src/SmartFormat.Tests/Core/ParserTests.cs b/src/SmartFormat.Tests/Core/ParserTests.cs index f82eecf2..ab65df2c 100644 --- a/src/SmartFormat.Tests/Core/ParserTests.cs +++ b/src/SmartFormat.Tests/Core/ParserTests.cs @@ -155,6 +155,7 @@ public void Parser_Error_Action_Ignore() // | Literal | Erroneous | | Okay | var invalidTemplate = "Hello, I'm {Name from {City} {Street}"; + // settings must be set before parser instantiation var parser = GetRegularParser(new SmartSettings {Parser = new ParserSettings {ErrorAction = ParseErrorAction.Ignore}}); using var parsed = parser.ParseFormat(invalidTemplate); @@ -177,6 +178,7 @@ public void Parser_Error_Action_Ignore() [TestCase("Hello, I'm {Name from {City} {Street", false)] public void Parser_Error_Action_MaintainTokens(string invalidTemplate, bool lastItemIsPlaceholder) { + // settings must be set before parser instantiation var parser = GetRegularParser(new SmartSettings {Parser = new ParserSettings {ErrorAction = ParseErrorAction.MaintainTokens}}); using var parsed = parser.ParseFormat(invalidTemplate); @@ -203,14 +205,21 @@ public void Parser_Error_Action_MaintainTokens(string invalidTemplate, bool last public void Parser_Error_Action_OutputErrorInResult() { // | Literal | Erroneous | - // ▼ Selector must not contain { var invalidTemplate = "Hello, I'm {Name from {City}"; - - var parser = GetRegularParser(new SmartSettings {Parser = new ParserSettings {ErrorAction = ParseErrorAction.OutputErrorInResult}}); + + var parser = GetRegularParser(new SmartSettings + { + Parser = new ParserSettings + { + SelectorCharFilter = FilterType.Allowlist, // default + ErrorAction = ParseErrorAction.OutputErrorInResult + } + }); + using var parsed = parser.ParseFormat(invalidTemplate); Assert.That(parsed.Items, Has.Count.EqualTo(1)); - Assert.That(parsed.Items[0].RawText, Does.StartWith("The format string has 1 issue")); + Assert.That(parsed.Items[0].RawText, Does.StartWith("The format string has 3 issues")); } [Test] @@ -414,11 +423,11 @@ public void Parser_NotifyParsingError() }); formatter.Parser.OnParsingFailure += (o, args) => parsingError = args.Errors; - var res = formatter.Format("{NoName {Other} {Same", default(object)!); + var res = formatter.Format("{NoName {Other} {Same"); Assert.Multiple(() => { - Assert.That(parsingError!.Issues, Has.Count.EqualTo(2)); - Assert.That(parsingError.Issues[1].Issue, Is.EqualTo(new Parser.ParsingErrorText()[SmartFormat.Core.Parsing.Parser.ParsingError.MissingClosingBrace])); + Assert.That(parsingError!.Issues, Has.Count.EqualTo(3)); + Assert.That(parsingError.Issues[2].Issue, Is.EqualTo(new Parser.ParsingErrorText()[Parser.ParsingError.MissingClosingBrace])); }); } @@ -459,6 +468,18 @@ public void Escaping_TheEscapingCharacter_ShouldWork() Assert.That(result, Is.EqualTo(@"\\aaa\{}bbb ccc\x{}ddd\\")); } + [Test] + public void Parsing_Selector_With_CharFromBlocklist_ShouldThrow() + { + var settings = new SmartSettings { Parser = new ParserSettings { SelectorCharFilter = FilterType.Blocklist } }; + var parser = GetRegularParser(settings); + + // The newline character is in the default blocklist of disallowed characters + Assert.That(() => parser.ParseFormat("{A\nB}"), + Throws.Exception.InstanceOf().And.Message + .Contains(new Parser.ParsingErrorText()[Parser.ParsingError.InvalidCharactersInSelector])); + } + [Test] public void StringFormat_Escaping_In_Literal() { @@ -536,8 +557,10 @@ public void Parse_Unicode(string formatString, string unicodeLiteral, int itemIn [TestCase("{%C}", '%')] public void Selector_With_Custom_Selector_Character(string formatString, char customChar) { + // settings must be set before parser instantiation var settings = new SmartSettings(); - settings.Parser.AddCustomSelectorChars(new[]{customChar}); + settings.Parser.AddCustomSelectorChars([customChar]); + var x = settings.Parser.GetSelectorChars(); var parser = GetRegularParser(settings); var result = parser.ParseFormat(formatString); @@ -546,7 +569,7 @@ public void Selector_With_Custom_Selector_Character(string formatString, char cu Assert.That(placeholder!.Selectors, Has.Count.EqualTo(1)); Assert.Multiple(() => { - Assert.That(placeholder!.Selectors, Has.Count.EqualTo(placeholder!.GetSelectors().Count)); + Assert.That(placeholder.Selectors, Has.Count.EqualTo(placeholder.GetSelectors().Count)); Assert.That(placeholder.Selectors[0].ToString(), Is.EqualTo(formatString.Substring(1, 2))); }); } @@ -555,8 +578,10 @@ public void Selector_With_Custom_Selector_Character(string formatString, char cu [TestCase("{a°b}", '°')] public void Selectors_With_Custom_Operator_Character(string formatString, char customChar) { - var parser = GetRegularParser(); - parser.Settings.Parser.AddCustomOperatorChars(new[]{customChar}); + // settings must be set before parser instantiation + var settings = new SmartSettings(); + settings.Parser.AddCustomOperatorChars([customChar]); + var parser = GetRegularParser(settings); var result = parser.ParseFormat(formatString); var placeholder = result.Items[0] as Placeholder; @@ -583,10 +608,12 @@ public void Selector_WorksWithAllUnicodeChars(string selector) { // See https://github.com/axuno/SmartFormat/issues/454 + // settings must be set before parser instantiation + var settings = new SmartSettings { Parser = { SelectorCharFilter = FilterType.Blocklist } }; const string expected = "The Value"; // The default formatter with default settings should be able to handle any // Unicode characters in selectors except the "magic" disallowed ones - var formatter = Smart.CreateDefaultSmartFormat(); + var formatter = Smart.CreateDefaultSmartFormat(settings); // Use the Unicode string as a selector of the placeholder var template = $"{{{selector}}}"; var result = formatter.Format(template, new Dictionary { { selector, expected } }); @@ -647,10 +674,11 @@ public void Selector_With_Nullable_Operator_Character(string formatString) public void Selector_With_Other_Contiguous_Operator_Characters(string formatString, char customChar) { // contiguous operator characters are parsed as "ONE operator string" - - var parser = GetRegularParser(); + var settings = new SmartSettings(); + settings.Parser.AddCustomOperatorChars([customChar]); + var parser = GetRegularParser(settings); // adding '.' is ignored, as it's a standard operator - parser.Settings.Parser.AddCustomOperatorChars(new[]{customChar}); + parser.Settings.Parser.AddCustomOperatorChars([customChar]); var result = parser.ParseFormat(formatString); var placeholder = result.Items[0] as Placeholder; @@ -706,6 +734,12 @@ public void ParseInputAsHtml(string input) Assert.That(literalText!.RawText, Is.EqualTo(input)); } + #region * Parse HTML input without ParserSetting 'IsHtml' + + /// + /// is : + /// all characters are allowed in selectors + /// [TestCase("", "{Placeholder}")] [TestCase("", "{Placeholder}")] [TestCase("Something ! nice", "{ color : #000; }")] @@ -715,7 +749,12 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, string sel var parser = GetRegularParser(new SmartSettings { StringFormatCompatibility = false, - Parser = new ParserSettings { ErrorAction = ParseErrorAction.ThrowError, ParseInputAsHtml = false } + Parser = new ParserSettings + { + SelectorCharFilter = FilterType.Blocklist, + ErrorAction = ParseErrorAction.ThrowError, + ParseInputAsHtml = false + } }); var result = parser.ParseFormat(input); @@ -724,9 +763,45 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, string sel Assert.That(result.Items, Has.Count.EqualTo(3)); Assert.That(((Placeholder) result.Items[1]).RawText, Is.EqualTo(selector)); }); + } + /// + /// is : + /// Predefined set of allowed characters in selectors + /// + [TestCase("", false)] // should parse a placeholder + [TestCase("", false)] // should parse a placeholder + [TestCase("Something ! nice", true)] // illegal selector chars + [TestCase("Something ';}! nice", true)] // illegal selector chars + public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, bool shouldThrow) + { + var parser = GetRegularParser(new SmartSettings + { + StringFormatCompatibility = false, + Parser = new ParserSettings + { + SelectorCharFilter = FilterType.Allowlist, + ErrorAction = ParseErrorAction.ThrowError, + ParseInputAsHtml = false + } + }); + + switch (shouldThrow) + { + case true: + Assert.That(() => _ = parser.ParseFormat(input), Throws.TypeOf()); + break; + case false: + { + var result = parser.ParseFormat(input); + Assert.That(result.Items, Has.Count.EqualTo(3)); + break; + } + } } + #endregion + /// /// SmartFormat is able to parse script tags, if is /// diff --git a/src/SmartFormat.Tests/Core/SettingsTests.cs b/src/SmartFormat.Tests/Core/SettingsTests.cs index 9e60cc6c..bd87c6b3 100644 --- a/src/SmartFormat.Tests/Core/SettingsTests.cs +++ b/src/SmartFormat.Tests/Core/SettingsTests.cs @@ -23,14 +23,15 @@ public void ExistingSelectorCharacter_Should_Not_Be_Added() Assert.Multiple(() => { Assert.That(settings.Parser.CustomSelectorChars.Count(c => c == 'A'), Is.EqualTo(0)); - Assert.That(settings.Parser.CustomSelectorChars.Count(c => c == ' '), Is.EqualTo(0)); + Assert.That(settings.Parser.CustomSelectorChars.Count(c => c == ' '), Is.EqualTo(1)); }); } - [Test] - public void ControlCharacters_Should_Be_Added_As_SelectorChars() + [TestCase(FilterType.Allowlist)] + [TestCase(FilterType.Blocklist)] + public void ControlCharacters_Should_Be_Added_As_SelectorChars(FilterType filterType) { - var settings = new SmartSettings(); + var settings = new SmartSettings { Parser = { SelectorCharFilter = filterType } }; var controlChars = ParserSettings.ControlChars().ToList(); settings.Parser.AddCustomSelectorChars(controlChars); @@ -39,8 +40,8 @@ public void ControlCharacters_Should_Be_Added_As_SelectorChars() Assert.That(settings.Parser.CustomSelectorChars, Has.Count.EqualTo(controlChars.Count)); foreach (var c in settings.Parser.CustomSelectorChars) { - Assert.That(settings.Parser.DisallowedSelectorChars(), Does.Not.Contain(c), - $"Control char U+{(int)c:X4} should be allowed as selector char."); + Assert.That(settings.Parser.GetSelectorChars(), filterType == FilterType.Allowlist ? Does.Contain(c) : Does.Not.Contain(c), + $"Control char U+{(int) c:X4} should be allowed as selector char."); } }); } diff --git a/src/SmartFormat/Core/Parsing/CharSet.cs b/src/SmartFormat/Core/Parsing/CharSet.cs new file mode 100644 index 00000000..a4804294 --- /dev/null +++ b/src/SmartFormat/Core/Parsing/CharSet.cs @@ -0,0 +1,216 @@ +// Copyright SmartFormat Project maintainers and contributors. +// Licensed under the MIT license. + +using System; +using System.Collections; +using System.Collections.Generic; + +namespace SmartFormat.Core.Parsing; + +/// +/// Represents a set of characters that supports efficient storage and lookup +/// for both ASCII and non-ASCII characters. +/// +/// +/// The class is optimized for handling ASCII characters using a bitmap +/// representation, while non-ASCII characters are stored in a separate collection. +/// +/// The class provides methods to add characters individually or in bulk, remove characters, check for containment, and enumerate all +/// characters in the set. ASCII characters are enumerated first in numerical order, followed by non-ASCII characters in +/// no guaranteed order. +/// +/// This class is not thread-safe. +/// +internal class CharSet : IEnumerable +{ + private const int ASCII_LIMIT = 128; + private const int BITS_PER_UINT = 32; + private const int BITMAP_LENGTH = ASCII_LIMIT / BITS_PER_UINT; + + private readonly uint[] _asciiBitmap = new uint[BITMAP_LENGTH]; + private readonly HashSet _nonAsciiChars = []; + + /// + /// Gets or sets a value indicating whether the list is + /// an allowlist (, default) or a blocklist (). + /// + public bool IsAllowList { get; set; } + + /// + /// Initializes a new instance of the class that is empty. + /// + public CharSet() + {} + + /// + /// Initializes a new instance of the class that contains the characters + /// from the specified read-only span. + /// + /// The read-only span containing characters to add to the set. + public CharSet(ReadOnlySpan characters) + { + AddRange(characters); + } + + /// + /// Initializes a new instance of the class that contains the characters + /// from the specified collection. + /// + /// The collection of characters to add to the set. + /// Thrown when is null. + public CharSet(IEnumerable characters) + { + AddRange(characters); + } + + /// + /// Adds all characters from the specified read-only span to the current set. + /// Only adds characters that aren't already present in the set. + /// + /// The read-only span containing characters to add. + public void AddRange(ReadOnlySpan characters) + { + foreach (var ch in characters) + Add(ch); + } + + /// + /// Adds all characters from the specified collection to the current set. + /// Only adds characters that aren't already present in the set. + /// + /// The collection of characters to add. + /// Thrown when is null. + public void AddRange(IEnumerable characters) + { + foreach (var ch in characters) + Add(ch); + } + + /// + /// Adds the specified character to the current set. + /// Only adds a character that isn't already present in the set. + /// + /// The character to add. + public void Add(char c) + { + if (c < ASCII_LIMIT) + _asciiBitmap[c / BITS_PER_UINT] |= 1u << c % BITS_PER_UINT; + else + _nonAsciiChars.Add(c); + } + + /// + /// Removes the specified character from the current set. + /// + /// The character to remove. + /// + /// if the character was successfully found and removed; + /// otherwise, . + /// + public bool Remove(char c) + { + if (c < ASCII_LIMIT) + { + ref var bitmap = ref _asciiBitmap[c / BITS_PER_UINT]; + var mask = 1u << c % BITS_PER_UINT; + + if ((bitmap & mask) == 0) return false; + + bitmap &= ~mask; + return true; + } + + return _nonAsciiChars.Remove(c); + } + + /// + /// Determines whether the current set contains the specified character. + /// + /// The character to locate in the set. + /// + /// if the set contains the specified character; otherwise, . + /// + public bool Contains(char c) + { + if (c < ASCII_LIMIT) + return (_asciiBitmap[c / BITS_PER_UINT] & 1u << c % BITS_PER_UINT) != 0; + + return _nonAsciiChars.Contains(c); + } + + /// + /// Removes all characters from the current set. + /// + public void Clear() + { + Array.Clear(_asciiBitmap, 0, _asciiBitmap.Length); + _nonAsciiChars.Clear(); + } + + /// + /// Gets the number of characters contained in the set. + /// + /// The number of characters in the set. + public int Count + { + get + { + var count = 0; + + // Count ASCII characters using bit population count + foreach (var segment in _asciiBitmap) + count += BitCount(segment); + + return count + _nonAsciiChars.Count; + } + } + + /// + /// Returns an enumerator that iterates through the characters in the set. + /// + /// An enumerator that can be used to iterate through the characters in the set. + /// + /// The enumeration returns ASCII characters first (in numerical order), followed by non-ASCII characters + /// (in no guaranteed order). + /// + public IEnumerable GetCharacters() + { + for (var i = 0; i < ASCII_LIMIT; i++) + if ((_asciiBitmap[i / BITS_PER_UINT] & 1u << i % BITS_PER_UINT) != 0) + yield return (char) i; + + foreach (var c in _nonAsciiChars) + yield return c; + } + + /// + /// Helper method to count set bits in an uint (Hamming weight) + /// + /// The unsigned integer value to count bits in. + /// The number of bits set to 1 in the specified value. + private static int BitCount(uint value) + { + // SWAR (SIMD Within A Register) technique for counting the number + // of set bits (1s) in a 32-bit unsigned integer. + + // Count bits in pairs. + // Subtracts each pair of bits from itself shifted right by one, masked to isolate alternating bits. + value -= value >> 1 & 0x55555555; + // Count bits in 4-bit groups. Adds adjacent 2-bit counts to form 4-bit counts. + value = (value & 0x33333333) + (value >> 2 & 0x33333333); + // Aggregate all 4-bit counts into a single total. + return (int) ((value + (value >> 4) & 0x0F0F0F0F) * 0x01010101) >> 24; + } + + /// + public IEnumerator GetEnumerator() + { + foreach (var ch in GetCharacters()) yield return ch; + } + + /// + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } +} diff --git a/src/SmartFormat/Core/Parsing/Parser.cs b/src/SmartFormat/Core/Parsing/Parser.cs index 992c5381..2360cf9f 100644 --- a/src/SmartFormat/Core/Parsing/Parser.cs +++ b/src/SmartFormat/Core/Parsing/Parser.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using SmartFormat.Core.Settings; using SmartFormat.Pooling.SmartPools; @@ -33,11 +34,11 @@ public class Parser public SmartSettings Settings { get; } // Cache method results from settings - private readonly List _operatorChars; - private readonly List _customOperatorChars; + private readonly CharSet _operatorChars; + private readonly CharSet _customOperatorChars; private readonly ParserSettings _parserSettings; - private readonly HashSet _disallowedSelectorChars; - private readonly List _formatOptionsTerminatorChars; + private readonly CharSet _selectorChars; + private readonly CharSet _formatOptionsTerminatorChars; #endregion @@ -63,11 +64,11 @@ public Parser(SmartSettings? smartSettings = null) { Settings = smartSettings ?? new SmartSettings(); _parserSettings = Settings.Parser; - _operatorChars = ParserSettings.OperatorChars; - _customOperatorChars = _parserSettings.CustomOperatorChars; - _formatOptionsTerminatorChars = ParserSettings.FormatOptionsTerminatorChars; - - _disallowedSelectorChars = _parserSettings.DisallowedSelectorChars(); + _operatorChars = new CharSet(ParserSettings.OperatorChars.AsSpan()) ; + _customOperatorChars = new CharSet(_parserSettings.CustomOperatorChars); + _formatOptionsTerminatorChars = new CharSet(ParserSettings.FormatOptionsTerminatorChars.AsSpan()); + // Selector chars can be an allowlist or blocklist: + _selectorChars = _parserSettings.GetSelectorChars(); } #endregion @@ -78,6 +79,7 @@ public Parser(SmartSettings? smartSettings = null) /// Includes a-z and A-Z in the list of allowed selector chars. /// [Obsolete("Alphanumeric selectors are always enabled", true)] + [ExcludeFromCodeCoverage] public void AddAlphanumericSelectors() { // Do nothing - this is the standard behavior @@ -88,6 +90,7 @@ public void AddAlphanumericSelectors() /// /// [Obsolete("Use 'Settings.Parser.AddCustomSelectorChars' instead.", true)] + [ExcludeFromCodeCoverage] public void AddAdditionalSelectorChars(string chars) { _parserSettings.AddCustomSelectorChars(chars.ToCharArray()); @@ -100,6 +103,7 @@ public void AddAdditionalSelectorChars(string chars) /// /// [Obsolete("Use 'Settings.Parser.AddCustomOperatorChars' instead.", true)] + [ExcludeFromCodeCoverage] public void AddOperators(string chars) { _parserSettings.AddCustomOperatorChars(chars.ToCharArray()); @@ -112,6 +116,7 @@ public void AddOperators(string chars) /// /// Defaults to backslash [Obsolete("Use 'Settings.StringFormatCompatibility' instead.", true)] + [ExcludeFromCodeCoverage] public void UseAlternativeEscapeChar(char alternativeEscapeChar = '\\') { if (alternativeEscapeChar != _parserSettings.CharLiteralEscapeChar) @@ -129,6 +134,7 @@ public void UseAlternativeEscapeChar(char alternativeEscapeChar = '\\') /// backslash. /// [Obsolete("Use 'Settings.StringFormatCompatibility' instead.", true)] + [ExcludeFromCodeCoverage] public void UseBraceEscaping() { throw new NotSupportedException($"Init-only property {nameof(Settings)}.{nameof(Settings.StringFormatCompatibility)} can only be set in an object initializer"); @@ -140,6 +146,7 @@ public void UseBraceEscaping() /// /// [Obsolete("This feature has been removed", true)] + [ExcludeFromCodeCoverage] public void UseAlternativeBraces(char opening, char closing) { throw new NotSupportedException("This feature has been removed"); @@ -323,11 +330,28 @@ private void ProcessSelector(char inputChar, ParserState state, ParsingErrors pa else { // Ensure the selector characters are valid: - if (_disallowedSelectorChars.Contains(inputChar)) - parsingErrors.AddIssue(state.ResultFormat, - $"'0x{Convert.ToUInt32(inputChar):X}': " + - _parsingErrorText[ParsingError.InvalidCharactersInSelector], - state.Index.Current, state.Index.SafeAdd(state.Index.Current, 1)); + if (_selectorChars.IsAllowList) + { + // Only allow specific characters + if (!_selectorChars.Contains(inputChar)) + { + parsingErrors.AddIssue(state.ResultFormat, + $"'0x{Convert.ToUInt32(inputChar):X}': " + + _parsingErrorText[ParsingError.InvalidCharactersInSelector], + state.Index.Current, state.Index.SafeAdd(state.Index.Current, 1)); + } + } + else + { + // Blocklist: Disallow specific characters + if (_selectorChars.Contains(inputChar)) + { + parsingErrors.AddIssue(state.ResultFormat, + $"'0x{Convert.ToUInt32(inputChar):X}': " + + _parsingErrorText[ParsingError.InvalidCharactersInSelector], + state.Index.Current, state.Index.SafeAdd(state.Index.Current, 1)); + } + } } } diff --git a/src/SmartFormat/Core/Settings/FilterType.cs b/src/SmartFormat/Core/Settings/FilterType.cs new file mode 100644 index 00000000..9d12c5b9 --- /dev/null +++ b/src/SmartFormat/Core/Settings/FilterType.cs @@ -0,0 +1,23 @@ +// +// Copyright SmartFormat Project maintainers and contributors. +// Licensed under the MIT license. + +namespace SmartFormat.Core.Settings; + +/// +/// Determines the filter type for allowed or disallowed characters. +/// +public enum FilterType +{ + /// + /// Use a list of characters that are allowed. The default characters are
+ /// alphanumeric characters (upper and lower case), plus '_' and '-'.
+ ///
+ Allowlist, + + /// + /// All Unicode characters are allowed, except those in the blocklist. + /// The default blocklist characters are all control characters (ASCII 0-31 and 127). + /// + Blocklist +} diff --git a/src/SmartFormat/Core/Settings/ParserSettings.cs b/src/SmartFormat/Core/Settings/ParserSettings.cs index 8fa41ca9..c0e3eab0 100644 --- a/src/SmartFormat/Core/Settings/ParserSettings.cs +++ b/src/SmartFormat/Core/Settings/ParserSettings.cs @@ -18,6 +18,9 @@ public class ParserSettings { private readonly List _customSelectorChars = []; private readonly List _customOperatorChars = []; + private FilterType _selectorCharFilter = FilterType.Allowlist; + + private const string StandardAllowlist = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-"; /// /// Gets or sets the to use for the . @@ -31,85 +34,66 @@ public class ParserSettings internal List CustomSelectorChars => _customSelectorChars; /// - /// The list of characters which are delimiting a selector. + /// Gets a list of the custom operator characters, which were set with . + /// Contiguous operator characters are parsed as one operator (e.g. '?.'). /// - internal static readonly HashSet SelectorDelimitingChars = - [ - FormatterNameSeparator, - PlaceholderBeginChar, PlaceholderEndChar, - FormatterOptionsBeginChar, FormatterOptionsEndChar - ]; + internal List CustomOperatorChars => _customOperatorChars; /// - /// Gets the set of control characters (ASCII 0-31 and 127). + /// When (default) is set, an allowlist of selector characters is used. + /// The allowlist contains alphanumeric characters (upper and lower case), plus '_' and '-'. + /// On top, any custom selector characters added with are included. + /// + /// When , all Unicode characters are allowed in a selector, + /// except control characters (ASCII 0-31 and 127). Excluded control characters can be added back + /// using . + /// + /// Changing this setting clears any custom operator characters added with . /// - internal static IEnumerable ControlChars() + public FilterType SelectorCharFilter { - for (var i = 0; i <= 31; i++) yield return (char) i; - yield return (char) 127; // delete character + get + { + return _selectorCharFilter; + } + set + { + _selectorCharFilter = value; + _customOperatorChars.Clear(); + } } /// - /// The list of characters which are disallowed in a selector. + /// The list of characters for a selector. + /// This can be an allowlist, which contains explicitly allowed characters, + /// or a blocklist, when all Unicode characters are allowed, except those from the blocklist. /// - internal HashSet DisallowedSelectorChars() + internal CharSet GetSelectorChars() => SelectorCharFilter == FilterType.Allowlist ? CreateAllowlist() : CreateBlocklist(); + + private CharSet CreateBlocklist() { - var chars = new HashSet { + var chars = new CharSet { CharLiteralEscapeChar // avoid confusion with escape sequences }; - chars.UnionWith(SelectorDelimitingChars); - chars.UnionWith(OperatorChars); // no overlaps - chars.UnionWith(CustomOperatorChars); // no overlaps + chars.IsAllowList = false; + chars.AddRange(SelectorDelimitingChars.AsSpan()); + chars.AddRange(OperatorChars.AsSpan()); // no overlaps + chars.AddRange(_customOperatorChars); // no overlaps // Hard to visualize and debug, disallow by default - can be added back as custom selector chars - chars.UnionWith(ControlChars()); + chars.AddRange(ControlChars()); - // Remove characters used as custom selector chars. - // Note: Using chars.ExceptWith(_customOperatorChars) would not remove char 0. - foreach (var c in _customSelectorChars) chars.Remove(c); + // Remove characters used as custom selector chars from the blocklist + foreach (var c in _customSelectorChars) chars.Remove(c); return chars; } - /// - /// Gets a list of the custom operator characters, which were set with . - /// Contiguous operator characters are parsed as one operator (e.g. '?.'). - /// - internal List CustomOperatorChars => _customOperatorChars; - - /// - /// Add a list of allowable selector characters on top of the default selector characters. - /// This can be useful to add control characters (ASCII 0-31 and 127) that are excluded by default. - /// Operator chars and selector chars must be different. - /// - public void AddCustomSelectorChars(IList characters) - { - var controlChars = ControlChars().ToList(); - - foreach (var c in characters) - { - // Explicitly disallow certain characters - if (SelectorDelimitingChars.Contains(c) || c == CharLiteralEscapeChar - || OperatorChars.Contains(c) || CustomOperatorChars.Contains(c)) - throw new ArgumentException($"Cannot add '{c}' as a custom selector character. It is disallowed or in use as an operator character."); - - if (controlChars.Contains(c)) - _customSelectorChars.Add(c); - } - } - - /// - /// Add a list of allowable operator characters on top of the standard setting. - /// Operator chars and selector chars must be different. - /// - public void AddCustomOperatorChars(IList characters) + private CharSet CreateAllowlist() { - foreach (var c in characters) - { - if (SelectorDelimitingChars.Contains(c) || CustomSelectorChars.Contains(c)) - throw new ArgumentException($"Cannot add '{c}' as a custom operator character. It is disallowed or in use as a selector."); - - if (!OperatorChars.Contains(c) && !_customOperatorChars.Contains(c)) - _customOperatorChars.Add(c); - } + var chars = new CharSet {IsAllowList = true}; + chars.AddRange(StandardAllowlist.AsSpan()); + // Add characters used as custom selector chars to the allowlist + chars.AddRange(_customSelectorChars); + return chars; } /// @@ -148,15 +132,6 @@ public void AddCustomOperatorChars(IList characters) /// internal const char FormatterNameSeparator = ':'; - /// - /// The standard operator characters. - /// Contiguous operator characters are parsed as one operator (e.g. '?.'). - /// - internal static readonly List OperatorChars = - [ - SelectorOperator, NullableOperator, AlignmentOperator, ListIndexBeginChar, ListIndexEndChar - ]; - /// /// The character which separates the selector for alignment. E.g.: Smart.Format("Name: {name,10}") /// @@ -208,9 +183,84 @@ public void AddCustomOperatorChars(IList characters) /// Characters which terminate parsing of format options. /// To use them as options, they must be escaped (preceded) by the . /// - internal static readonly List FormatOptionsTerminatorChars = + internal static readonly char[] FormatOptionsTerminatorChars = [ FormatterNameSeparator, FormatterOptionsBeginChar, FormatterOptionsEndChar, PlaceholderBeginChar, PlaceholderEndChar ]; + + /// + /// The standard operator characters. + /// Contiguous operator characters are parsed as one operator (e.g. '?.'). + /// + internal static readonly char[] OperatorChars = + [ + SelectorOperator, NullableOperator, AlignmentOperator, ListIndexBeginChar, ListIndexEndChar + ]; + + /// + /// The list of characters which are delimiting a selector. + /// + internal static readonly char[] SelectorDelimitingChars = + [ + FormatterNameSeparator, + PlaceholderBeginChar, PlaceholderEndChar, + FormatterOptionsBeginChar, FormatterOptionsEndChar + ]; + + /// + /// Gets the set of control characters (ASCII 0-31 and 127). + /// + internal static IEnumerable ControlChars() + { + for (var i = 0; i <= 31; i++) yield return (char) i; + yield return (char) 127; // delete character + } + + /// + /// Add a list of allowable selector characters on top of the default selector characters. + /// + /// When is (default), an allowlist of selector characters is used. + /// The allowlist contains alphanumeric characters (upper and lower case), plus '_' and '-'. + /// On top, any custom selector characters added with are included. + /// + /// When is , all Unicode characters are allowed in a selector, + /// except control characters (ASCII 0-31 and 127). Excluded control characters can be added back + /// using . + /// + /// Operator chars and selector chars must be different. + /// + public void AddCustomSelectorChars(IList characters) + { + var controlChars = ControlChars().ToList(); + + foreach (var c in characters) + { + // Explicitly disallow certain characters + if (SelectorDelimitingChars.Contains(c) || c == CharLiteralEscapeChar + || OperatorChars.Contains(c) || CustomOperatorChars.Contains(c)) + throw new ArgumentException($"Cannot add '{c}' as a custom selector character. It is disallowed or in use as an operator character."); + + if (controlChars.Contains(c)) + _customSelectorChars.Add(c); + + if (SelectorCharFilter == FilterType.Allowlist && !(StandardAllowlist.Contains(c) || _customSelectorChars.Contains(c))) _customSelectorChars.Add(c); + } + } + + /// + /// Add a list of allowable operator characters on top of the standard setting. + /// Operator chars and selector chars must be different. + /// + public void AddCustomOperatorChars(IList characters) + { + foreach (var c in characters) + { + if (SelectorDelimitingChars.Contains(c) || CustomSelectorChars.Contains(c)) + throw new ArgumentException($"Cannot add '{c}' as a custom operator character. It is disallowed or in use as a selector."); + + if (!OperatorChars.Contains(c) && !_customOperatorChars.Contains(c)) + _customOperatorChars.Add(c); + } + } } From ab623c159f4c1746e2a2734945ed5e9b60514abf Mon Sep 17 00:00:00 2001 From: axunonb Date: Tue, 11 Nov 2025 10:56:39 +0100 Subject: [PATCH 4/5] Change enum `FilterType` to `SelectorFilterType` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement proposals from review: * SelectorFilterType.Alphanumeric: alphanumeric characters (upper and lower case), plus '_' and '-' * SelectorFilterType.VisualUnicodeChars: All Unicode characters are allowed in a selector, except 68 non-visual characters: Control Characters (U+0000–U+001F, U+007F), Format Characters (Category: Cf), Directional Formatting (Category: Cf), Invisible Separator, Common Combining Marks (Category: Mn), Whitespace Characters (non-glyph spacing). --- src/SmartFormat.Tests/Core/ParserTests.cs | 10 +- src/SmartFormat.Tests/Core/SettingsTests.cs | 16 +-- src/SmartFormat/Core/Settings/FilterType.cs | 23 ---- .../Core/Settings/ParserSettings.cs | 119 ++++++++++++++---- .../Core/Settings/SelectorFilterType.cs | 27 ++++ 5 files changed, 138 insertions(+), 57 deletions(-) delete mode 100644 src/SmartFormat/Core/Settings/FilterType.cs create mode 100644 src/SmartFormat/Core/Settings/SelectorFilterType.cs diff --git a/src/SmartFormat.Tests/Core/ParserTests.cs b/src/SmartFormat.Tests/Core/ParserTests.cs index ab65df2c..dc6780bc 100644 --- a/src/SmartFormat.Tests/Core/ParserTests.cs +++ b/src/SmartFormat.Tests/Core/ParserTests.cs @@ -211,7 +211,7 @@ public void Parser_Error_Action_OutputErrorInResult() { Parser = new ParserSettings { - SelectorCharFilter = FilterType.Allowlist, // default + SelectorCharFilter = SelectorFilterType.Alphanumeric, // default ErrorAction = ParseErrorAction.OutputErrorInResult } }); @@ -471,7 +471,7 @@ public void Escaping_TheEscapingCharacter_ShouldWork() [Test] public void Parsing_Selector_With_CharFromBlocklist_ShouldThrow() { - var settings = new SmartSettings { Parser = new ParserSettings { SelectorCharFilter = FilterType.Blocklist } }; + var settings = new SmartSettings { Parser = new ParserSettings { SelectorCharFilter = SelectorFilterType.VisualUnicodeChars } }; var parser = GetRegularParser(settings); // The newline character is in the default blocklist of disallowed characters @@ -609,7 +609,7 @@ public void Selector_WorksWithAllUnicodeChars(string selector) // See https://github.com/axuno/SmartFormat/issues/454 // settings must be set before parser instantiation - var settings = new SmartSettings { Parser = { SelectorCharFilter = FilterType.Blocklist } }; + var settings = new SmartSettings { Parser = { SelectorCharFilter = SelectorFilterType.VisualUnicodeChars } }; const string expected = "The Value"; // The default formatter with default settings should be able to handle any // Unicode characters in selectors except the "magic" disallowed ones @@ -751,7 +751,7 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, string sel StringFormatCompatibility = false, Parser = new ParserSettings { - SelectorCharFilter = FilterType.Blocklist, + SelectorCharFilter = SelectorFilterType.VisualUnicodeChars, ErrorAction = ParseErrorAction.ThrowError, ParseInputAsHtml = false } @@ -780,7 +780,7 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, bool shoul StringFormatCompatibility = false, Parser = new ParserSettings { - SelectorCharFilter = FilterType.Allowlist, + SelectorCharFilter = SelectorFilterType.Alphanumeric, ErrorAction = ParseErrorAction.ThrowError, ParseInputAsHtml = false } diff --git a/src/SmartFormat.Tests/Core/SettingsTests.cs b/src/SmartFormat.Tests/Core/SettingsTests.cs index bd87c6b3..38140348 100644 --- a/src/SmartFormat.Tests/Core/SettingsTests.cs +++ b/src/SmartFormat.Tests/Core/SettingsTests.cs @@ -27,21 +27,21 @@ public void ExistingSelectorCharacter_Should_Not_Be_Added() }); } - [TestCase(FilterType.Allowlist)] - [TestCase(FilterType.Blocklist)] - public void ControlCharacters_Should_Be_Added_As_SelectorChars(FilterType filterType) + [TestCase(SelectorFilterType.Alphanumeric)] + [TestCase(SelectorFilterType.VisualUnicodeChars)] + public void NonVisualCharacters_Should_Be_AddedBack_As_SelectorChars(SelectorFilterType filterType) { var settings = new SmartSettings { Parser = { SelectorCharFilter = filterType } }; - var controlChars = ParserSettings.ControlChars().ToList(); - settings.Parser.AddCustomSelectorChars(controlChars); + var nonVisualChars = ParserSettings.NonVisualUnicodeCharacters; + settings.Parser.AddCustomSelectorChars(nonVisualChars); Assert.Multiple(() => { - Assert.That(settings.Parser.CustomSelectorChars, Has.Count.EqualTo(controlChars.Count)); + Assert.That(settings.Parser.CustomSelectorChars, Has.Count.EqualTo(nonVisualChars.Length)); foreach (var c in settings.Parser.CustomSelectorChars) { - Assert.That(settings.Parser.GetSelectorChars(), filterType == FilterType.Allowlist ? Does.Contain(c) : Does.Not.Contain(c), - $"Control char U+{(int) c:X4} should be allowed as selector char."); + Assert.That(settings.Parser.GetSelectorChars(), filterType == SelectorFilterType.Alphanumeric ? Does.Contain(c) : Does.Not.Contain(c), + $"Character U+{(int) c:X4} should be allowed as selector char."); } }); } diff --git a/src/SmartFormat/Core/Settings/FilterType.cs b/src/SmartFormat/Core/Settings/FilterType.cs deleted file mode 100644 index 9d12c5b9..00000000 --- a/src/SmartFormat/Core/Settings/FilterType.cs +++ /dev/null @@ -1,23 +0,0 @@ -// -// Copyright SmartFormat Project maintainers and contributors. -// Licensed under the MIT license. - -namespace SmartFormat.Core.Settings; - -/// -/// Determines the filter type for allowed or disallowed characters. -/// -public enum FilterType -{ - /// - /// Use a list of characters that are allowed. The default characters are
- /// alphanumeric characters (upper and lower case), plus '_' and '-'.
- ///
- Allowlist, - - /// - /// All Unicode characters are allowed, except those in the blocklist. - /// The default blocklist characters are all control characters (ASCII 0-31 and 127). - /// - Blocklist -} diff --git a/src/SmartFormat/Core/Settings/ParserSettings.cs b/src/SmartFormat/Core/Settings/ParserSettings.cs index c0e3eab0..f14e6831 100644 --- a/src/SmartFormat/Core/Settings/ParserSettings.cs +++ b/src/SmartFormat/Core/Settings/ParserSettings.cs @@ -18,7 +18,7 @@ public class ParserSettings { private readonly List _customSelectorChars = []; private readonly List _customOperatorChars = []; - private FilterType _selectorCharFilter = FilterType.Allowlist; + private SelectorFilterType _selectorCharFilter = SelectorFilterType.Alphanumeric; private const string StandardAllowlist = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-"; @@ -40,17 +40,21 @@ public class ParserSettings internal List CustomOperatorChars => _customOperatorChars; /// - /// When (default) is set, an allowlist of selector characters is used. + /// When (default) is set, an allowlist of selector characters is used. /// The allowlist contains alphanumeric characters (upper and lower case), plus '_' and '-'. /// On top, any custom selector characters added with are included. /// - /// When , all Unicode characters are allowed in a selector, - /// except control characters (ASCII 0-31 and 127). Excluded control characters can be added back - /// using . + /// When is set, all Unicode characters are allowed in a selector, + /// except 68 non-visual characters: Control Characters (U+0000–U+001F, U+007F), Format Characters (Category: Cf), + /// Directional Formatting (Category: Cf), Invisible Separator, Common Combining Marks (Category: Mn), + /// Whitespace Characters (non-glyph spacing).
+ /// Excluded characters can be added back using . + /// + /// {}[]()\.? are characters with special functions that are never allowed. /// /// Changing this setting clears any custom operator characters added with . ///
- public FilterType SelectorCharFilter + public SelectorFilterType SelectorCharFilter { get { @@ -68,7 +72,7 @@ public FilterType SelectorCharFilter /// This can be an allowlist, which contains explicitly allowed characters, /// or a blocklist, when all Unicode characters are allowed, except those from the blocklist. /// - internal CharSet GetSelectorChars() => SelectorCharFilter == FilterType.Allowlist ? CreateAllowlist() : CreateBlocklist(); + internal CharSet GetSelectorChars() => SelectorCharFilter == SelectorFilterType.Alphanumeric ? CreateAllowlist() : CreateBlocklist(); private CharSet CreateBlocklist() { @@ -79,8 +83,7 @@ private CharSet CreateBlocklist() chars.AddRange(SelectorDelimitingChars.AsSpan()); chars.AddRange(OperatorChars.AsSpan()); // no overlaps chars.AddRange(_customOperatorChars); // no overlaps - // Hard to visualize and debug, disallow by default - can be added back as custom selector chars - chars.AddRange(ControlChars()); + chars.AddRange(NonVisualUnicodeCharacters.AsSpan()); // Remove characters used as custom selector chars from the blocklist foreach (var c in _customSelectorChars) chars.Remove(c); @@ -209,13 +212,90 @@ private CharSet CreateAllowlist() ]; /// - /// Gets the set of control characters (ASCII 0-31 and 127). + /// All 68 non-visual Unicode characters that are typically not used in selectors. /// - internal static IEnumerable ControlChars() - { - for (var i = 0; i <= 31; i++) yield return (char) i; - yield return (char) 127; // delete character - } + internal static char[] NonVisualUnicodeCharacters = + [ + // Control Characters (U+0000–U+001F, U+007F) + '\u0000', // NULL – string terminator + '\u0001', // START OF HEADING – protocol control + '\u0002', // START OF TEXT – protocol control + '\u0003', // END OF TEXT – protocol control + '\u0004', // END OF TRANSMISSION – protocol control + '\u0005', // ENQUIRY – request for response + '\u0006', // ACKNOWLEDGE – positive response + '\u0007', // BELL – triggers alert + '\u0008', // BACKSPACE – moves cursor back + '\u0009', // CHARACTER TABULATION – horizontal tab + '\u000A', // LINE FEED – line break + '\u000B', // LINE TABULATION – vertical tab + '\u000C', // FORM FEED – page break + '\u000D', // CARRIAGE RETURN – return to line start + '\u000E', // SHIFT OUT – alternate character set + '\u000F', // SHIFT IN – return to standard set + '\u0010', // DATA LINK ESCAPE – protocol framing + '\u0011', // DEVICE CONTROL 1 – device-specific + '\u0012', // DEVICE CONTROL 2 – device-specific + '\u0013', // DEVICE CONTROL 3 – device-specific + '\u0014', // DEVICE CONTROL 4 – device-specific + '\u0015', // NEGATIVE ACKNOWLEDGE – error signal + '\u0016', // SYNCHRONOUS IDLE – timing control + '\u0017', // END OF TRANSMISSION BLOCK – block end + '\u0018', // CANCEL – cancel transmission + '\u0019', // END OF MEDIUM – physical medium end + '\u001A', // SUBSTITUTE – invalid character + '\u001B', // ESCAPE – escape sequence initiator + '\u001C', // FILE SEPARATOR – data structuring + '\u001D', // GROUP SEPARATOR – data structuring + '\u001E', // RECORD SEPARATOR – data structuring + '\u001F', // UNIT SEPARATOR – data structuring + '\u007F', // DELETE – erase character + + // Format Characters (Category: Cf) + '\u200B', // ZERO WIDTH SPACE – invisible space + '\u200C', // ZERO WIDTH NON-JOINER – prevents ligature + '\u200D', // ZERO WIDTH JOINER – forces ligature + '\u2060', // WORD JOINER – prevents line break + '\uFEFF', // ZERO WIDTH NO-BREAK SPACE – BOM or NBSP + + // Directional Formatting (Category: Cf) + '\u202A', // LEFT-TO-RIGHT EMBEDDING – sets LTR context + '\u202B', // RIGHT-TO-LEFT EMBEDDING – sets RTL context + '\u202C', // POP DIRECTIONAL FORMATTING – ends override + '\u202D', // LEFT-TO-RIGHT OVERRIDE – forces LTR rendering + '\u202E', // RIGHT-TO-LEFT OVERRIDE – forces RTL rendering + '\u2066', // LEFT-TO-RIGHT ISOLATE – isolates LTR segment + '\u2067', // RIGHT-TO-LEFT ISOLATE – isolates RTL segment + '\u2068', // FIRST STRONG ISOLATE – isolates with inferred direction + '\u2069', // POP DIRECTIONAL ISOLATE – ends isolate + + // Invisible Separator + '\u2063', // INVISIBLE SEPARATOR – semantic boundary marker + + // Common Combining Marks (Category: Mn) + '\u0300', // COMBINING GRAVE ACCENT – diacritic (invisible alone) + '\u0301', // COMBINING ACUTE ACCENT – diacritic (invisible alone) + '\u0302', // COMBINING CIRCUMFLEX ACCENT – diacritic (invisible alone) + '\u0308', // COMBINING DIAERESIS – diacritic (invisible alone) + + // Whitespace Characters (non-glyph spacing) + '\u00A0', // NO-BREAK SPACE – non-breaking space + '\u1680', // OGHAM SPACE MARK – special spacing + '\u2000', // EN QUAD – fixed-width space + '\u2001', // EM QUAD – fixed-width space + '\u2002', // EN SPACE – fixed-width space + '\u2003', // EM SPACE – fixed-width space + '\u2004', // THREE-PER-EM SPACE – narrow space + '\u2005', // FOUR-PER-EM SPACE – narrow space + '\u2006', // SIX-PER-EM SPACE – narrow space + '\u2007', // FIGURE SPACE – aligns digits + '\u2008', // PUNCTUATION SPACE – aligns punctuation + '\u2009', // THIN SPACE – narrow space + '\u200A', // HAIR SPACE – ultra-thin space + '\u202F', // NARROW NO-BREAK SPACE – narrow NBSP + '\u205F', // MEDIUM MATHEMATICAL SPACE – math spacing + '\u3000' // IDEOGRAPHIC SPACE – full-width CJK space + ]; /// /// Add a list of allowable selector characters on top of the default selector characters. @@ -225,15 +305,12 @@ internal static IEnumerable ControlChars() /// On top, any custom selector characters added with are included. /// /// When is , all Unicode characters are allowed in a selector, - /// except control characters (ASCII 0-31 and 127). Excluded control characters can be added back - /// using . + /// except 68 non-visual characters. Excluded characters can be added back using . /// /// Operator chars and selector chars must be different. /// public void AddCustomSelectorChars(IList characters) { - var controlChars = ControlChars().ToList(); - foreach (var c in characters) { // Explicitly disallow certain characters @@ -241,10 +318,10 @@ public void AddCustomSelectorChars(IList characters) || OperatorChars.Contains(c) || CustomOperatorChars.Contains(c)) throw new ArgumentException($"Cannot add '{c}' as a custom selector character. It is disallowed or in use as an operator character."); - if (controlChars.Contains(c)) + if (NonVisualUnicodeCharacters.Contains(c)) _customSelectorChars.Add(c); - if (SelectorCharFilter == FilterType.Allowlist && !(StandardAllowlist.Contains(c) || _customSelectorChars.Contains(c))) _customSelectorChars.Add(c); + if (SelectorCharFilter == SelectorFilterType.Alphanumeric && !(StandardAllowlist.Contains(c) || _customSelectorChars.Contains(c))) _customSelectorChars.Add(c); } } diff --git a/src/SmartFormat/Core/Settings/SelectorFilterType.cs b/src/SmartFormat/Core/Settings/SelectorFilterType.cs new file mode 100644 index 00000000..b975b987 --- /dev/null +++ b/src/SmartFormat/Core/Settings/SelectorFilterType.cs @@ -0,0 +1,27 @@ +// +// Copyright SmartFormat Project maintainers and contributors. +// Licensed under the MIT license. + +namespace SmartFormat.Core.Settings; + +/// +/// Determines the filter type for allowed or disallowed characters. +/// +public enum SelectorFilterType +{ + /// + /// Use a list of characters that are allowed. The default characters are
+ /// alphanumeric characters (upper and lower case), plus '_' and '-'.
+ ///
+ Alphanumeric, + + /// + /// All Unicode characters are allowed in a selector, except 68 non-visual characters: + /// Control Characters (U+0000–U+001F, U+007F), Format Characters (Category: Cf), + /// Directional Formatting (Category: Cf), Invisible Separator, Common Combining Marks (Category: Mn), + /// Whitespace Characters (non-glyph spacing). + /// + /// {}[]()\.? are characters with special functions that are never allowed. + /// + VisualUnicodeChars +} From 8cb11ce4d3529c64282b70cfa4f1b590a147b1e5 Mon Sep 17 00:00:00 2001 From: axunonb Date: Tue, 11 Nov 2025 13:15:03 +0100 Subject: [PATCH 5/5] Make `NonVisualUnicodeCharacters` read-only --- src/SmartFormat/Core/Settings/ParserSettings.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SmartFormat/Core/Settings/ParserSettings.cs b/src/SmartFormat/Core/Settings/ParserSettings.cs index f14e6831..291325d7 100644 --- a/src/SmartFormat/Core/Settings/ParserSettings.cs +++ b/src/SmartFormat/Core/Settings/ParserSettings.cs @@ -214,7 +214,7 @@ private CharSet CreateAllowlist() /// /// All 68 non-visual Unicode characters that are typically not used in selectors. /// - internal static char[] NonVisualUnicodeCharacters = + internal static readonly char[] NonVisualUnicodeCharacters = [ // Control Characters (U+0000–U+001F, U+007F) '\u0000', // NULL – string terminator