From a9a355f61a82567d412dca6c4422b7fe91801878 Mon Sep 17 00:00:00 2001 From: b3b00 Date: Fri, 26 Apr 2024 08:58:26 +0200 Subject: [PATCH 1/3] lexer options : initial --- csly-cli-model/lexer/LexerModel.cs | 7 ++- csly-cli-model/lexer/LexerOptions.cs | 14 ++++++ ...{Optimization.cs => ParserOptimization.cs} | 2 +- csly-cli-parser/CLIParser.cs | 45 ++++++++++++++++--- csly-cli-parser/CLIToken.cs | 11 +++++ 5 files changed, 70 insertions(+), 9 deletions(-) create mode 100644 csly-cli-model/lexer/LexerOptions.cs rename csly-cli-model/parser/{Optimization.cs => ParserOptimization.cs} (78%) diff --git a/csly-cli-model/lexer/LexerModel.cs b/csly-cli-model/lexer/LexerModel.cs index 2b9e925..7b461ab 100644 --- a/csly-cli-model/lexer/LexerModel.cs +++ b/csly-cli-model/lexer/LexerModel.cs @@ -1,11 +1,15 @@ +using clsy.cli.model.lexer; using csly.cli.model; using csly.cli.model.lexer; +using csly.cli.model.parser; using sly.lexer; namespace clsy.cli.builder.parser.cli.model; public class LexerModel : ICLIModel { + + public LexerOptions Options { get; set; } public List Tokens { get; set; } public Dictionary> TokensByName { get; set; } @@ -14,10 +18,11 @@ public class LexerModel : ICLIModel public bool HasExtension => Tokens.Exists(x => x.Type == GenericToken.Extension); - public LexerModel(List tokens, string name) + public LexerModel(List tokens, LexerOptions options, string name) { Name = name; Tokens = tokens; + Options = options; var grouped = tokens.GroupBy(x => x.Name); TokensByName = grouped.ToDictionary(x => x.Key, x => x.ToList()); } diff --git a/csly-cli-model/lexer/LexerOptions.cs b/csly-cli-model/lexer/LexerOptions.cs new file mode 100644 index 0000000..ffbc3f9 --- /dev/null +++ b/csly-cli-model/lexer/LexerOptions.cs @@ -0,0 +1,14 @@ +using csly.cli.model; + +namespace clsy.cli.model.lexer; + +public class LexerOptions : ICLIModel +{ + public bool IgnoreWS { get; set; } + + public bool IgnoreEOL { get; set; } + + public bool IgnoreKeyWordCase { get; set; } + + public bool IndentationAware { get; set; } +} \ No newline at end of file diff --git a/csly-cli-model/parser/Optimization.cs b/csly-cli-model/parser/ParserOptimization.cs similarity index 78% rename from csly-cli-model/parser/Optimization.cs rename to csly-cli-model/parser/ParserOptimization.cs index aa08562..0c718bd 100644 --- a/csly-cli-model/parser/Optimization.cs +++ b/csly-cli-model/parser/ParserOptimization.cs @@ -2,7 +2,7 @@ namespace csly.cli.model.parser; -public class Optimization : ICLIModel +public class ParserOptimization : ICLIModel { public bool UseMemoization { get; set; } public bool BroadenTokenWindow { get; set; } diff --git a/csly-cli-parser/CLIParser.cs b/csly-cli-parser/CLIParser.cs index a13845a..ab807d6 100644 --- a/csly-cli-parser/CLIParser.cs +++ b/csly-cli-parser/CLIParser.cs @@ -1,4 +1,5 @@ using clsy.cli.builder.parser.cli.model; +using clsy.cli.model.lexer; using csly.cli.model; using csly.cli.model.lexer; using csly.cli.model.parser; @@ -23,10 +24,10 @@ public ICLIModel Root(ICLIModel genericLex, ICLIModel parser, ParserContext cont return new Model(genericLex as LexerModel, parser as ParserModel) ; } - [Production("parserRoot : PARSER[d] ID SEMICOLON[d] optimization* rule*")] + [Production("parserRoot : PARSER[d] ID SEMICOLON[d] parser_optimization* rule*")] public ICLIModel Parser(Token name, List optimizations, List rules, ParserContext context) { - var optims = optimizations.Cast().ToList(); + var optims = optimizations.Cast().ToList(); var model = new ParserModel() { UseMemoization = optims.Exists(x => x.UseMemoization), @@ -40,10 +41,10 @@ public ICLIModel Parser(Token name, List optimizations, Lis } - [Production("optimization : LEFTBRACKET[d] [USEMEMOIZATION|BROADENTOKENWINDOW] RIGHTBRACKET[d]")] + [Production("parser_optimization : LEFTBRACKET[d] [USEMEMOIZATION|BROADENTOKENWINDOW] RIGHTBRACKET[d]")] public ICLIModel Optimization(Token optimizationToken, ParserContext context) { - return new Optimization() + return new ParserOptimization() { UseMemoization = optimizationToken.TokenID == CLIToken.USEMEMOIZATION, BroadenTokenWindow = optimizationToken.TokenID == CLIToken.BROADENTOKENWINDOW @@ -56,10 +57,18 @@ public ICLIModel Optimization(Token optimizationToken, ParserContext c #region generic lexer - [Production("genericRoot : GENERICLEXER[d] ID SEMICOLON[d] modedToken*")] - public ICLIModel Lexer(Token name, List tokens, ParserContext context) + [Production("genericRoot : GENERICLEXER[d] ID SEMICOLON[d] (lexeroption)* modedToken*")] + public ICLIModel Lexer(Token name, List optionList, List tokens, ParserContext context) { - return new LexerModel(tokens.Cast().ToList(), name.Value); + var opts = optionList.Cast(); + var options = new LexerOptions() + { + IgnoreWS = opts.Any(x => x.IgnoreWS), + IndentationAware = opts.Any(x => x.IndentationAware), + IgnoreEOL = opts.Any(x => x.IgnoreEOL), + IgnoreKeyWordCase = opts.Any(x => x.IgnoreKeyWordCase) + }; + return new LexerModel(tokens.Cast().ToList(),options, name.Value); } [Production("modedToken : mode* token")] @@ -304,7 +313,29 @@ public ICLIModel RangeDefinition(Token start, Token end, Par #endregion + #region lexer options + + // [Keyword("IndentationAware")] INDENTATIONAWARE, + // [Keyword("IgnoreWhiteSpaces")] IGNOREWHITESPACES, + // [Keyword("IgnoreEndOfLines")] IGNOREEOL, + // [Keyword("IgnoreKeyWordCase")] IGNOREKEYWORDCASING, + + [Production( + "lexer_option : LEFTBRACKET[d] [IGNOREKEYWORDCASING|INDENTATIONAWARE|IGNOREWHITESPACES|IGNOREEOL] LEFTPAREN[d][TRUE|FALSE]RIGHTPAREN[d] RIGHTBRACKET[d]")] + public ICLIModel lexerOption(Token option, Token enabledFlag) + { + bool enabled = enabledFlag.Value == "true"; + return new LexerOptions() + { + IgnoreWS = option.TokenID == CLIToken.IGNOREWHITESPACES && enabled, + IgnoreEOL = option.TokenID == CLIToken.IGNOREEOL && enabled, + IgnoreKeyWordCase = option.TokenID == CLIToken.IGNOREKEYWORDCASING && enabled, + IndentationAware = option.TokenID == CLIToken.INDENTATIONAWARE && enabled + }; + } + + #endregion #region parser diff --git a/csly-cli-parser/CLIToken.cs b/csly-cli-parser/CLIToken.cs index e8e2e19..4a3a9dd 100644 --- a/csly-cli-parser/CLIToken.cs +++ b/csly-cli-parser/CLIToken.cs @@ -61,9 +61,20 @@ public enum CLIToken [Keyword("Push")] PUSH, [Keyword("Mode")] MODE, [Keyword("Pop")] POP, + [Keyword("true")] TRUE, + [Keyword("false")] FALSE, + + + // parser optimizations [Keyword("UseMemoization")] USEMEMOIZATION, [Keyword("BroadenTokenWindow")] BROADENTOKENWINDOW, + // lexer options + [Keyword("IndentationAware")] INDENTATIONAWARE, + [Keyword("IgnoreWhiteSpaces")] IGNOREWHITESPACES, + [Keyword("IgnoreEndOfLines")] IGNOREEOL, + [Keyword("IgnoreKeyWordCase")] IGNOREKEYWORDCASING, + [Keyword("YYYYMMDD")] YYYYMMDD, [Keyword("DDMMYYYY")] DDMMYYYY, From 88f0d16957157d57c4505a54997994b9c031f299 Mon Sep 17 00:00:00 2001 From: b3b00 Date: Fri, 26 Apr 2024 14:09:50 +0200 Subject: [PATCH 2/3] . --- Tests/Tests.cs | 34 ++++++++++++++++++++++++++++++++ csly-cli-builder/LexerBuilder.cs | 29 ++++++++++++++++++++++++++- csly-cli-parser/CLIParser.cs | 4 ++-- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/Tests/Tests.cs b/Tests/Tests.cs index f1db93f..9999e48 100644 --- a/Tests/Tests.cs +++ b/Tests/Tests.cs @@ -258,4 +258,38 @@ public void TestDateToken() Check.That(dateTime).IsEqualTo("2024.04.23"); } + [Fact] + public void TestLexerOptions() + { + var grammar = @" +genericLexer MinimalLexer; +[IndentationAware(false)] +[IgnoreKeyWordCase(true)] + +[AlphaNumDashId] ID; +[KeyWord] HELLO : ""hello""; +[KeyWord] WORLD : ""world""; + +parser MinimalParser; + +-> root : HELLO WORLD ; +"; + var builder = new ParserBuilder(); + var model = builder.CompileModel(grammar, "MinimalParser"); + Check.That(model).IsOkModel(); + Check.That(model.Value.LexerModel.Options.IgnoreKeyWordCase).IsTrue(); + Check.That(model.Value.LexerModel.Options.IndentationAware).IsFalse(); + Check.That(model.Value.LexerModel.Options.IgnoreWS).IsFalse(); + Check.That(model.Value.LexerModel.Options.IgnoreEOL).IsFalse(); + + var generator = new LexerGenerator(); + var lexer = generator.GenerateLexer(model.Value.LexerModel, "namespace"); + ; + var json = builder.Getz(grammar, "hello world", "MyParser", new List<(string format, SyntaxTreeProcessor processor)>() {("JSON",ParserBuilder.SyntaxTreeToJson)}); + Check.That(json.IsError).IsFalse(); + json = builder.Getz(grammar, "HELLO woRld", "MyParser", new List<(string format, SyntaxTreeProcessor processor)>() {("JSON",ParserBuilder.SyntaxTreeToJson)}); + Check.That(json.IsError).IsFalse(); + + } + } \ No newline at end of file diff --git a/csly-cli-builder/LexerBuilder.cs b/csly-cli-builder/LexerBuilder.cs index 7996e6c..085d883 100644 --- a/csly-cli-builder/LexerBuilder.cs +++ b/csly-cli-builder/LexerBuilder.cs @@ -3,6 +3,7 @@ using System.Reflection.Emit; using System.Text.Json.Serialization; using clsy.cli.builder.parser.cli.model; +using clsy.cli.model.lexer; using csly.cli.model.lexer; using sly.lexer; using sly.lexer.fsm; @@ -34,7 +35,7 @@ public LexerBuilder(string name) ModuleBuilder moduleBuilder = dynamicAssembly.DefineDynamicModule(aName.Name); EnumBuilder enumBuilder = moduleBuilder.DefineEnum(DynamicLexerName, TypeAttributes.Public, typeof(int)); - + int i = 0; enumBuilder.DefineLiteral($"{model.Name.ToUpper()}_EOS", i); @@ -57,6 +58,32 @@ public LexerBuilder(string name) return (finished,extensionBuilder,dynamicAssembly,moduleBuilder); } + + private void SetLexerOptions(EnumBuilder builder, LexerOptions options) + { + var attributeType = typeof(LexerAttribute); + ConstructorInfo constructorInfo = attributeType.GetConstructor( + new Type[3] { typeof(string), typeof(bool), typeof(int) }); + + CustomAttributeBuilder customAttributeBuilder = new CustomAttributeBuilder( + attributeType.GetConstructor(Type.EmptyTypes), + new object[0], + new PropertyInfo?[] { // properties to assign to + attributeType.GetProperty(nameof(LexerAttribute.IgnoreEOL)), + attributeType.GetProperty(nameof(LexerAttribute.IgnoreWS)), + attributeType.GetProperty(nameof(LexerAttribute.KeyWordIgnoreCase)), + attributeType.GetProperty(nameof(LexerAttribute.IndentationAWare)), + }!, + new object[] { // values for property assignment + options.IgnoreEOL, + options.IgnoreWS, + options.IgnoreKeyWordCase, + options.IndentationAware + }); + + builder.SetCustomAttribute(customAttributeBuilder); + } + private Delegate BuildExtensionIfNeeded(LexerModel model, Type? enumType) { diff --git a/csly-cli-parser/CLIParser.cs b/csly-cli-parser/CLIParser.cs index ab807d6..ae7e8a5 100644 --- a/csly-cli-parser/CLIParser.cs +++ b/csly-cli-parser/CLIParser.cs @@ -57,7 +57,7 @@ public ICLIModel Optimization(Token optimizationToken, ParserContext c #region generic lexer - [Production("genericRoot : GENERICLEXER[d] ID SEMICOLON[d] (lexeroption)* modedToken*")] + [Production("genericRoot : GENERICLEXER[d] ID SEMICOLON[d] lexer_option* modedToken*")] public ICLIModel Lexer(Token name, List optionList, List tokens, ParserContext context) { var opts = optionList.Cast(); @@ -322,7 +322,7 @@ public ICLIModel RangeDefinition(Token start, Token end, Par [Production( "lexer_option : LEFTBRACKET[d] [IGNOREKEYWORDCASING|INDENTATIONAWARE|IGNOREWHITESPACES|IGNOREEOL] LEFTPAREN[d][TRUE|FALSE]RIGHTPAREN[d] RIGHTBRACKET[d]")] - public ICLIModel lexerOption(Token option, Token enabledFlag) + public ICLIModel lexerOption(Token option, Token enabledFlag, ParserContext context) { bool enabled = enabledFlag.Value == "true"; return new LexerOptions() From 38cf13951dff4ee1001e0aed22d2cafa517f4dad Mon Sep 17 00:00:00 2001 From: b3b00 Date: Fri, 26 Apr 2024 14:40:57 +0200 Subject: [PATCH 3/3] . --- Tests/Tests.cs | 8 ++++---- csly-cli-builder/LexerBuilder.cs | 10 ++++++---- csly-cli-model/lexer/LexerOptions.cs | 8 ++++---- csly-cli-parser/CLIParser.cs | 17 +++++++++-------- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/Tests/Tests.cs b/Tests/Tests.cs index 9999e48..128c68f 100644 --- a/Tests/Tests.cs +++ b/Tests/Tests.cs @@ -277,10 +277,10 @@ public void TestLexerOptions() var builder = new ParserBuilder(); var model = builder.CompileModel(grammar, "MinimalParser"); Check.That(model).IsOkModel(); - Check.That(model.Value.LexerModel.Options.IgnoreKeyWordCase).IsTrue(); - Check.That(model.Value.LexerModel.Options.IndentationAware).IsFalse(); - Check.That(model.Value.LexerModel.Options.IgnoreWS).IsFalse(); - Check.That(model.Value.LexerModel.Options.IgnoreEOL).IsFalse(); + Check.That(model.Value.LexerModel.Options.IgnoreKeyWordCase.Value).IsTrue(); + Check.That(model.Value.LexerModel.Options.IndentationAware.Value).IsFalse(); + Check.That(model.Value.LexerModel.Options.IgnoreWS).IsNull(); + Check.That(model.Value.LexerModel.Options.IgnoreEOL).IsNull(); var generator = new LexerGenerator(); var lexer = generator.GenerateLexer(model.Value.LexerModel, "namespace"); diff --git a/csly-cli-builder/LexerBuilder.cs b/csly-cli-builder/LexerBuilder.cs index 085d883..4966491 100644 --- a/csly-cli-builder/LexerBuilder.cs +++ b/csly-cli-builder/LexerBuilder.cs @@ -35,6 +35,8 @@ public LexerBuilder(string name) ModuleBuilder moduleBuilder = dynamicAssembly.DefineDynamicModule(aName.Name); EnumBuilder enumBuilder = moduleBuilder.DefineEnum(DynamicLexerName, TypeAttributes.Public, typeof(int)); + + SetLexerOptions(enumBuilder, model.Options); int i = 0; @@ -75,10 +77,10 @@ private void SetLexerOptions(EnumBuilder builder, LexerOptions options) attributeType.GetProperty(nameof(LexerAttribute.IndentationAWare)), }!, new object[] { // values for property assignment - options.IgnoreEOL, - options.IgnoreWS, - options.IgnoreKeyWordCase, - options.IndentationAware + options.IgnoreEOL ?? true, + options.IgnoreWS ?? true, + options.IgnoreKeyWordCase ?? false, + options.IndentationAware ?? false }); builder.SetCustomAttribute(customAttributeBuilder); diff --git a/csly-cli-model/lexer/LexerOptions.cs b/csly-cli-model/lexer/LexerOptions.cs index ffbc3f9..7231ccc 100644 --- a/csly-cli-model/lexer/LexerOptions.cs +++ b/csly-cli-model/lexer/LexerOptions.cs @@ -4,11 +4,11 @@ namespace clsy.cli.model.lexer; public class LexerOptions : ICLIModel { - public bool IgnoreWS { get; set; } + public bool? IgnoreWS { get; set; } - public bool IgnoreEOL { get; set; } + public bool? IgnoreEOL { get; set; } - public bool IgnoreKeyWordCase { get; set; } + public bool? IgnoreKeyWordCase { get; set; } - public bool IndentationAware { get; set; } + public bool? IndentationAware { get; set; } } \ No newline at end of file diff --git a/csly-cli-parser/CLIParser.cs b/csly-cli-parser/CLIParser.cs index ae7e8a5..adb35d8 100644 --- a/csly-cli-parser/CLIParser.cs +++ b/csly-cli-parser/CLIParser.cs @@ -63,10 +63,11 @@ public ICLIModel Lexer(Token name, List optionList, List(); var options = new LexerOptions() { - IgnoreWS = opts.Any(x => x.IgnoreWS), - IndentationAware = opts.Any(x => x.IndentationAware), - IgnoreEOL = opts.Any(x => x.IgnoreEOL), - IgnoreKeyWordCase = opts.Any(x => x.IgnoreKeyWordCase) + + IgnoreWS = opts.Select(x => x.IgnoreWS).FirstOrDefault(x => x.HasValue), + IndentationAware = opts.Select(x => x.IndentationAware).FirstOrDefault(x => x.HasValue), + IgnoreEOL = opts.Select(x => x.IgnoreEOL).FirstOrDefault(x => x.HasValue), + IgnoreKeyWordCase = opts.Select(x => x.IgnoreKeyWordCase).FirstOrDefault(x => x.HasValue) }; return new LexerModel(tokens.Cast().ToList(),options, name.Value); } @@ -327,10 +328,10 @@ public ICLIModel lexerOption(Token option, Token enabledFlag bool enabled = enabledFlag.Value == "true"; return new LexerOptions() { - IgnoreWS = option.TokenID == CLIToken.IGNOREWHITESPACES && enabled, - IgnoreEOL = option.TokenID == CLIToken.IGNOREEOL && enabled, - IgnoreKeyWordCase = option.TokenID == CLIToken.IGNOREKEYWORDCASING && enabled, - IndentationAware = option.TokenID == CLIToken.INDENTATIONAWARE && enabled + IgnoreWS = option.TokenID == CLIToken.IGNOREWHITESPACES ? enabled : null, + IgnoreEOL = option.TokenID == CLIToken.IGNOREEOL ? enabled : null, + IgnoreKeyWordCase = option.TokenID == CLIToken.IGNOREKEYWORDCASING ? enabled : null, + IndentationAware = option.TokenID == CLIToken.INDENTATIONAWARE ? enabled : null }; }