Merge 38cf139 into 545a022

b3b00 · May 7, 2024 · 38d4fac · 38d4fac
2 parents 545a022 + 38cf139
commit 38d4fac
Show file tree

Hide file tree

Showing 7 changed files with 134 additions and 9 deletions.
diff --git a/Tests/Tests.cs b/Tests/Tests.cs
@@ -258,4 +258,38 @@ public void TestDateToken()
         Check.That(dateTime).IsEqualTo("2024.04.23");
     }
 
+    [Fact]
+    public void TestLexerOptions()
+    {
+        var grammar = @"
+genericLexer MinimalLexer;
+[IndentationAware(false)]
+[IgnoreKeyWordCase(true)]
+
+[AlphaNumDashId] ID;
+[KeyWord] HELLO : ""hello"";
+[KeyWord] WORLD : ""world"";
+
+parser MinimalParser;
+
+-> root : HELLO WORLD ;
+";
+        var builder = new ParserBuilder();
+        var model = builder.CompileModel(grammar, "MinimalParser");
+        Check.That(model).IsOkModel();
+        Check.That(model.Value.LexerModel.Options.IgnoreKeyWordCase.Value).IsTrue();
+        Check.That(model.Value.LexerModel.Options.IndentationAware.Value).IsFalse();
+        Check.That(model.Value.LexerModel.Options.IgnoreWS).IsNull();
+        Check.That(model.Value.LexerModel.Options.IgnoreEOL).IsNull();
+
+        var generator = new LexerGenerator();
+        var lexer = generator.GenerateLexer(model.Value.LexerModel, "namespace");
+        ;
+        var json = builder.Getz(grammar, "hello world", "MyParser", new List<(string format, SyntaxTreeProcessor processor)>() {("JSON",ParserBuilder.SyntaxTreeToJson)});
+        Check.That(json.IsError).IsFalse();
+        json = builder.Getz(grammar, "HELLO woRld", "MyParser", new List<(string format, SyntaxTreeProcessor processor)>() {("JSON",ParserBuilder.SyntaxTreeToJson)});
+        Check.That(json.IsError).IsFalse();
+
+    }
+
 }
diff --git a/csly-cli-builder/LexerBuilder.cs b/csly-cli-builder/LexerBuilder.cs
@@ -3,6 +3,7 @@
 using System.Reflection.Emit;
 using System.Text.Json.Serialization;
 using clsy.cli.builder.parser.cli.model;
+using clsy.cli.model.lexer;
 using csly.cli.model.lexer;
 using sly.lexer;
 using sly.lexer.fsm;
@@ -35,6 +36,8 @@ public LexerBuilder(string name)
 
             EnumBuilder enumBuilder = moduleBuilder.DefineEnum(DynamicLexerName, TypeAttributes.Public, typeof(int));
 
+            SetLexerOptions(enumBuilder, model.Options);
+
             int i = 0;
 
             enumBuilder.DefineLiteral($"{model.Name.ToUpper()}_EOS", i);
@@ -57,6 +60,32 @@ public LexerBuilder(string name)
             return (finished,extensionBuilder,dynamicAssembly,moduleBuilder);
         }
 
+
+        private void SetLexerOptions(EnumBuilder builder, LexerOptions options)
+        {
+            var attributeType = typeof(LexerAttribute);
+            ConstructorInfo constructorInfo = attributeType.GetConstructor(
+                new Type[3] { typeof(string), typeof(bool), typeof(int) });
+
+            CustomAttributeBuilder customAttributeBuilder = new CustomAttributeBuilder(
+                attributeType.GetConstructor(Type.EmptyTypes), 
+                new object[0],
+                new PropertyInfo?[] { // properties to assign to
+                    attributeType.GetProperty(nameof(LexerAttribute.IgnoreEOL)),
+                    attributeType.GetProperty(nameof(LexerAttribute.IgnoreWS)),
+                    attributeType.GetProperty(nameof(LexerAttribute.KeyWordIgnoreCase)),
+                    attributeType.GetProperty(nameof(LexerAttribute.IndentationAWare)),
+                }!,
+                new object[] { // values for property assignment
+                    options.IgnoreEOL ?? true,
+                    options.IgnoreWS ?? true,
+                    options.IgnoreKeyWordCase ?? false,
+                    options.IndentationAware ?? false
+                });
+
+            builder.SetCustomAttribute(customAttributeBuilder);
+        }
+
         private Delegate BuildExtensionIfNeeded(LexerModel model, Type? enumType)
         {
 

diff --git a/csly-cli-model/lexer/LexerModel.cs b/csly-cli-model/lexer/LexerModel.cs
@@ -1,11 +1,15 @@
+using clsy.cli.model.lexer;
 using csly.cli.model;
 using csly.cli.model.lexer;
+using csly.cli.model.parser;
 using sly.lexer;
 
 namespace clsy.cli.builder.parser.cli.model;
 
 public class LexerModel : ICLIModel
 {
+
+    public LexerOptions Options { get; set; }
     public  List<TokenModel> Tokens { get; set; } 
 
     public Dictionary<string, List<TokenModel>> TokensByName { get; set; }
@@ -14,10 +18,11 @@ public class LexerModel : ICLIModel
 
     public bool HasExtension => Tokens.Exists(x => x.Type == GenericToken.Extension);
 
-    public LexerModel(List<TokenModel> tokens, string name)
+    public LexerModel(List<TokenModel> tokens, LexerOptions options, string name)
     {
         Name = name;
         Tokens = tokens;
+        Options = options;
         var grouped = tokens.GroupBy(x => x.Name);
         TokensByName = grouped.ToDictionary(x => x.Key, x => x.ToList());
     }

diff --git a/csly-cli-model/lexer/LexerOptions.cs b/csly-cli-model/lexer/LexerOptions.cs
@@ -0,0 +1,14 @@
+using csly.cli.model;
+
+namespace clsy.cli.model.lexer;
+
+public class LexerOptions : ICLIModel
+{
+    public bool? IgnoreWS { get; set; }
+
+    public bool? IgnoreEOL { get; set; }
+
+    public bool? IgnoreKeyWordCase { get; set; }
+
+    public bool? IndentationAware { get; set; }
+}
diff --git a/csly-cli-model/parser/Optimization.cs → csly-cli-model/parser/ParserOptimization.cs b/csly-cli-model/parser/Optimization.cs → csly-cli-model/parser/ParserOptimization.cs
@@ -2,7 +2,7 @@
 
 namespace csly.cli.model.parser;
 
-public class Optimization : ICLIModel
+public class ParserOptimization : ICLIModel
 {
     public bool UseMemoization { get; set; }
     public bool BroadenTokenWindow { get; set; }

diff --git a/csly-cli-parser/CLIParser.cs b/csly-cli-parser/CLIParser.cs
@@ -1,4 +1,5 @@
 using clsy.cli.builder.parser.cli.model;
+using clsy.cli.model.lexer;
 using csly.cli.model;
 using csly.cli.model.lexer;
 using csly.cli.model.parser;
@@ -23,10 +24,10 @@ public ICLIModel Root(ICLIModel genericLex, ICLIModel parser, ParserContext cont
         return new Model(genericLex as LexerModel, parser as ParserModel) ;
     }
 
-    [Production("parserRoot : PARSER[d] ID SEMICOLON[d] optimization* rule*")]
+    [Production("parserRoot : PARSER[d] ID SEMICOLON[d] parser_optimization* rule*")]
     public ICLIModel Parser(Token<CLIToken> name, List<ICLIModel> optimizations, List<ICLIModel> rules, ParserContext context)
     {
-        var optims = optimizations.Cast<Optimization>().ToList();
+        var optims = optimizations.Cast<ParserOptimization>().ToList();
         var model = new ParserModel()
         {
             UseMemoization = optims.Exists(x => x.UseMemoization),
@@ -40,10 +41,10 @@ public ICLIModel Parser(Token<CLIToken> name, List<ICLIModel> optimizations, Lis
 
     }
 
-    [Production("optimization : LEFTBRACKET[d] [USEMEMOIZATION|BROADENTOKENWINDOW] RIGHTBRACKET[d]")]
+    [Production("parser_optimization : LEFTBRACKET[d] [USEMEMOIZATION|BROADENTOKENWINDOW] RIGHTBRACKET[d]")]
     public ICLIModel Optimization(Token<CLIToken> optimizationToken, ParserContext context)
     {
-        return new Optimization()
+        return new ParserOptimization()
         {
             UseMemoization = optimizationToken.TokenID == CLIToken.USEMEMOIZATION,
             BroadenTokenWindow = optimizationToken.TokenID == CLIToken.BROADENTOKENWINDOW
@@ -56,10 +57,19 @@ public ICLIModel Optimization(Token<CLIToken> optimizationToken, ParserContext c
 
    #region generic lexer
 
-    [Production("genericRoot : GENERICLEXER[d] ID SEMICOLON[d]  modedToken*")]
-    public ICLIModel Lexer(Token<CLIToken> name, List<ICLIModel> tokens, ParserContext context)
+    [Production("genericRoot : GENERICLEXER[d] ID SEMICOLON[d] lexer_option* modedToken*")]
+    public ICLIModel Lexer(Token<CLIToken> name, List<ICLIModel> optionList, List<ICLIModel> tokens, ParserContext context)
     {
-        return new LexerModel(tokens.Cast<TokenModel>().ToList(), name.Value);
+        var opts = optionList.Cast<LexerOptions>();
+        var options = new LexerOptions()
+        {
+
+            IgnoreWS = opts.Select(x => x.IgnoreWS).FirstOrDefault(x => x.HasValue),
+            IndentationAware = opts.Select(x => x.IndentationAware).FirstOrDefault(x => x.HasValue),
+            IgnoreEOL = opts.Select(x => x.IgnoreEOL).FirstOrDefault(x => x.HasValue),
+            IgnoreKeyWordCase = opts.Select(x => x.IgnoreKeyWordCase).FirstOrDefault(x => x.HasValue) 
+        };
+        return new LexerModel(tokens.Cast<TokenModel>().ToList(),options, name.Value);
     }
 
     [Production("modedToken : mode* token")]
@@ -304,7 +314,29 @@ public ICLIModel RangeDefinition(Token<CLIToken> start, Token<CLIToken> end, Par
 
     #endregion
 
+  #region lexer options
+
+  // [Keyword("IndentationAware")] INDENTATIONAWARE,
+  // [Keyword("IgnoreWhiteSpaces")] IGNOREWHITESPACES,
+  // [Keyword("IgnoreEndOfLines")] IGNOREEOL,
+  // [Keyword("IgnoreKeyWordCase")] IGNOREKEYWORDCASING,
+
+  [Production(
+      "lexer_option : LEFTBRACKET[d] [IGNOREKEYWORDCASING|INDENTATIONAWARE|IGNOREWHITESPACES|IGNOREEOL] LEFTPAREN[d][TRUE|FALSE]RIGHTPAREN[d] RIGHTBRACKET[d]")]
+  public ICLIModel lexerOption(Token<CLIToken> option, Token<CLIToken> enabledFlag, ParserContext context)
+  {
+      bool enabled = enabledFlag.Value == "true";
+      return new LexerOptions()
+      {
+          IgnoreWS = option.TokenID == CLIToken.IGNOREWHITESPACES ? enabled : null,
+          IgnoreEOL = option.TokenID == CLIToken.IGNOREEOL  ? enabled : null,
+          IgnoreKeyWordCase = option.TokenID == CLIToken.IGNOREKEYWORDCASING ? enabled : null,
+          IndentationAware = option.TokenID == CLIToken.INDENTATIONAWARE ?  enabled : null
+      };
+  }
+
 
+  #endregion
 
   #region  parser
 

diff --git a/csly-cli-parser/CLIToken.cs b/csly-cli-parser/CLIToken.cs
@@ -61,9 +61,20 @@ public enum CLIToken
     [Keyword("Push")] PUSH,
     [Keyword("Mode")] MODE,
     [Keyword("Pop")] POP,
+    [Keyword("true")] TRUE,
+    [Keyword("false")] FALSE,
+
+
+    // parser optimizations
     [Keyword("UseMemoization")] USEMEMOIZATION,
     [Keyword("BroadenTokenWindow")] BROADENTOKENWINDOW,
 
+    // lexer options
+    [Keyword("IndentationAware")] INDENTATIONAWARE,
+    [Keyword("IgnoreWhiteSpaces")] IGNOREWHITESPACES,
+    [Keyword("IgnoreEndOfLines")] IGNOREEOL,
+    [Keyword("IgnoreKeyWordCase")] IGNOREKEYWORDCASING,
+
     [Keyword("YYYYMMDD")] YYYYMMDD,
     [Keyword("DDMMYYYY")] DDMMYYYY,