diff --git a/Tests/Tests.cs b/Tests/Tests.cs index f1db93f..128c68f 100644 --- a/Tests/Tests.cs +++ b/Tests/Tests.cs @@ -258,4 +258,38 @@ public void TestDateToken() Check.That(dateTime).IsEqualTo("2024.04.23"); } + [Fact] + public void TestLexerOptions() + { + var grammar = @" +genericLexer MinimalLexer; +[IndentationAware(false)] +[IgnoreKeyWordCase(true)] + +[AlphaNumDashId] ID; +[KeyWord] HELLO : ""hello""; +[KeyWord] WORLD : ""world""; + +parser MinimalParser; + +-> root : HELLO WORLD ; +"; + var builder = new ParserBuilder(); + var model = builder.CompileModel(grammar, "MinimalParser"); + Check.That(model).IsOkModel(); + Check.That(model.Value.LexerModel.Options.IgnoreKeyWordCase.Value).IsTrue(); + Check.That(model.Value.LexerModel.Options.IndentationAware.Value).IsFalse(); + Check.That(model.Value.LexerModel.Options.IgnoreWS).IsNull(); + Check.That(model.Value.LexerModel.Options.IgnoreEOL).IsNull(); + + var generator = new LexerGenerator(); + var lexer = generator.GenerateLexer(model.Value.LexerModel, "namespace"); + ; + var json = builder.Getz(grammar, "hello world", "MyParser", new List<(string format, SyntaxTreeProcessor processor)>() {("JSON",ParserBuilder.SyntaxTreeToJson)}); + Check.That(json.IsError).IsFalse(); + json = builder.Getz(grammar, "HELLO woRld", "MyParser", new List<(string format, SyntaxTreeProcessor processor)>() {("JSON",ParserBuilder.SyntaxTreeToJson)}); + Check.That(json.IsError).IsFalse(); + + } + } \ No newline at end of file diff --git a/csly-cli-builder/LexerBuilder.cs b/csly-cli-builder/LexerBuilder.cs index 7996e6c..4966491 100644 --- a/csly-cli-builder/LexerBuilder.cs +++ b/csly-cli-builder/LexerBuilder.cs @@ -3,6 +3,7 @@ using System.Reflection.Emit; using System.Text.Json.Serialization; using clsy.cli.builder.parser.cli.model; +using clsy.cli.model.lexer; using csly.cli.model.lexer; using sly.lexer; using sly.lexer.fsm; @@ -35,6 +36,8 @@ public LexerBuilder(string name) EnumBuilder enumBuilder = moduleBuilder.DefineEnum(DynamicLexerName, TypeAttributes.Public, typeof(int)); + SetLexerOptions(enumBuilder, model.Options); + int i = 0; enumBuilder.DefineLiteral($"{model.Name.ToUpper()}_EOS", i); @@ -57,6 +60,32 @@ public LexerBuilder(string name) return (finished,extensionBuilder,dynamicAssembly,moduleBuilder); } + + private void SetLexerOptions(EnumBuilder builder, LexerOptions options) + { + var attributeType = typeof(LexerAttribute); + ConstructorInfo constructorInfo = attributeType.GetConstructor( + new Type[3] { typeof(string), typeof(bool), typeof(int) }); + + CustomAttributeBuilder customAttributeBuilder = new CustomAttributeBuilder( + attributeType.GetConstructor(Type.EmptyTypes), + new object[0], + new PropertyInfo?[] { // properties to assign to + attributeType.GetProperty(nameof(LexerAttribute.IgnoreEOL)), + attributeType.GetProperty(nameof(LexerAttribute.IgnoreWS)), + attributeType.GetProperty(nameof(LexerAttribute.KeyWordIgnoreCase)), + attributeType.GetProperty(nameof(LexerAttribute.IndentationAWare)), + }!, + new object[] { // values for property assignment + options.IgnoreEOL ?? true, + options.IgnoreWS ?? true, + options.IgnoreKeyWordCase ?? false, + options.IndentationAware ?? false + }); + + builder.SetCustomAttribute(customAttributeBuilder); + } + private Delegate BuildExtensionIfNeeded(LexerModel model, Type? enumType) { diff --git a/csly-cli-model/lexer/LexerModel.cs b/csly-cli-model/lexer/LexerModel.cs index 2b9e925..7b461ab 100644 --- a/csly-cli-model/lexer/LexerModel.cs +++ b/csly-cli-model/lexer/LexerModel.cs @@ -1,11 +1,15 @@ +using clsy.cli.model.lexer; using csly.cli.model; using csly.cli.model.lexer; +using csly.cli.model.parser; using sly.lexer; namespace clsy.cli.builder.parser.cli.model; public class LexerModel : ICLIModel { + + public LexerOptions Options { get; set; } public List Tokens { get; set; } public Dictionary> TokensByName { get; set; } @@ -14,10 +18,11 @@ public class LexerModel : ICLIModel public bool HasExtension => Tokens.Exists(x => x.Type == GenericToken.Extension); - public LexerModel(List tokens, string name) + public LexerModel(List tokens, LexerOptions options, string name) { Name = name; Tokens = tokens; + Options = options; var grouped = tokens.GroupBy(x => x.Name); TokensByName = grouped.ToDictionary(x => x.Key, x => x.ToList()); } diff --git a/csly-cli-model/lexer/LexerOptions.cs b/csly-cli-model/lexer/LexerOptions.cs new file mode 100644 index 0000000..7231ccc --- /dev/null +++ b/csly-cli-model/lexer/LexerOptions.cs @@ -0,0 +1,14 @@ +using csly.cli.model; + +namespace clsy.cli.model.lexer; + +public class LexerOptions : ICLIModel +{ + public bool? IgnoreWS { get; set; } + + public bool? IgnoreEOL { get; set; } + + public bool? IgnoreKeyWordCase { get; set; } + + public bool? IndentationAware { get; set; } +} \ No newline at end of file diff --git a/csly-cli-model/parser/Optimization.cs b/csly-cli-model/parser/ParserOptimization.cs similarity index 78% rename from csly-cli-model/parser/Optimization.cs rename to csly-cli-model/parser/ParserOptimization.cs index aa08562..0c718bd 100644 --- a/csly-cli-model/parser/Optimization.cs +++ b/csly-cli-model/parser/ParserOptimization.cs @@ -2,7 +2,7 @@ namespace csly.cli.model.parser; -public class Optimization : ICLIModel +public class ParserOptimization : ICLIModel { public bool UseMemoization { get; set; } public bool BroadenTokenWindow { get; set; } diff --git a/csly-cli-parser/CLIParser.cs b/csly-cli-parser/CLIParser.cs index a13845a..adb35d8 100644 --- a/csly-cli-parser/CLIParser.cs +++ b/csly-cli-parser/CLIParser.cs @@ -1,4 +1,5 @@ using clsy.cli.builder.parser.cli.model; +using clsy.cli.model.lexer; using csly.cli.model; using csly.cli.model.lexer; using csly.cli.model.parser; @@ -23,10 +24,10 @@ public ICLIModel Root(ICLIModel genericLex, ICLIModel parser, ParserContext cont return new Model(genericLex as LexerModel, parser as ParserModel) ; } - [Production("parserRoot : PARSER[d] ID SEMICOLON[d] optimization* rule*")] + [Production("parserRoot : PARSER[d] ID SEMICOLON[d] parser_optimization* rule*")] public ICLIModel Parser(Token name, List optimizations, List rules, ParserContext context) { - var optims = optimizations.Cast().ToList(); + var optims = optimizations.Cast().ToList(); var model = new ParserModel() { UseMemoization = optims.Exists(x => x.UseMemoization), @@ -40,10 +41,10 @@ public ICLIModel Parser(Token name, List optimizations, Lis } - [Production("optimization : LEFTBRACKET[d] [USEMEMOIZATION|BROADENTOKENWINDOW] RIGHTBRACKET[d]")] + [Production("parser_optimization : LEFTBRACKET[d] [USEMEMOIZATION|BROADENTOKENWINDOW] RIGHTBRACKET[d]")] public ICLIModel Optimization(Token optimizationToken, ParserContext context) { - return new Optimization() + return new ParserOptimization() { UseMemoization = optimizationToken.TokenID == CLIToken.USEMEMOIZATION, BroadenTokenWindow = optimizationToken.TokenID == CLIToken.BROADENTOKENWINDOW @@ -56,10 +57,19 @@ public ICLIModel Optimization(Token optimizationToken, ParserContext c #region generic lexer - [Production("genericRoot : GENERICLEXER[d] ID SEMICOLON[d] modedToken*")] - public ICLIModel Lexer(Token name, List tokens, ParserContext context) + [Production("genericRoot : GENERICLEXER[d] ID SEMICOLON[d] lexer_option* modedToken*")] + public ICLIModel Lexer(Token name, List optionList, List tokens, ParserContext context) { - return new LexerModel(tokens.Cast().ToList(), name.Value); + var opts = optionList.Cast(); + var options = new LexerOptions() + { + + IgnoreWS = opts.Select(x => x.IgnoreWS).FirstOrDefault(x => x.HasValue), + IndentationAware = opts.Select(x => x.IndentationAware).FirstOrDefault(x => x.HasValue), + IgnoreEOL = opts.Select(x => x.IgnoreEOL).FirstOrDefault(x => x.HasValue), + IgnoreKeyWordCase = opts.Select(x => x.IgnoreKeyWordCase).FirstOrDefault(x => x.HasValue) + }; + return new LexerModel(tokens.Cast().ToList(),options, name.Value); } [Production("modedToken : mode* token")] @@ -304,7 +314,29 @@ public ICLIModel RangeDefinition(Token start, Token end, Par #endregion + #region lexer options + + // [Keyword("IndentationAware")] INDENTATIONAWARE, + // [Keyword("IgnoreWhiteSpaces")] IGNOREWHITESPACES, + // [Keyword("IgnoreEndOfLines")] IGNOREEOL, + // [Keyword("IgnoreKeyWordCase")] IGNOREKEYWORDCASING, + + [Production( + "lexer_option : LEFTBRACKET[d] [IGNOREKEYWORDCASING|INDENTATIONAWARE|IGNOREWHITESPACES|IGNOREEOL] LEFTPAREN[d][TRUE|FALSE]RIGHTPAREN[d] RIGHTBRACKET[d]")] + public ICLIModel lexerOption(Token option, Token enabledFlag, ParserContext context) + { + bool enabled = enabledFlag.Value == "true"; + return new LexerOptions() + { + IgnoreWS = option.TokenID == CLIToken.IGNOREWHITESPACES ? enabled : null, + IgnoreEOL = option.TokenID == CLIToken.IGNOREEOL ? enabled : null, + IgnoreKeyWordCase = option.TokenID == CLIToken.IGNOREKEYWORDCASING ? enabled : null, + IndentationAware = option.TokenID == CLIToken.INDENTATIONAWARE ? enabled : null + }; + } + + #endregion #region parser diff --git a/csly-cli-parser/CLIToken.cs b/csly-cli-parser/CLIToken.cs index e8e2e19..4a3a9dd 100644 --- a/csly-cli-parser/CLIToken.cs +++ b/csly-cli-parser/CLIToken.cs @@ -61,9 +61,20 @@ public enum CLIToken [Keyword("Push")] PUSH, [Keyword("Mode")] MODE, [Keyword("Pop")] POP, + [Keyword("true")] TRUE, + [Keyword("false")] FALSE, + + + // parser optimizations [Keyword("UseMemoization")] USEMEMOIZATION, [Keyword("BroadenTokenWindow")] BROADENTOKENWINDOW, + // lexer options + [Keyword("IndentationAware")] INDENTATIONAWARE, + [Keyword("IgnoreWhiteSpaces")] IGNOREWHITESPACES, + [Keyword("IgnoreEndOfLines")] IGNOREEOL, + [Keyword("IgnoreKeyWordCase")] IGNOREKEYWORDCASING, + [Keyword("YYYYMMDD")] YYYYMMDD, [Keyword("DDMMYYYY")] DDMMYYYY,