eclipse-langium · msujew · Apr 2, 2025 · Mar 28, 2025
diff --git a/examples/arithmetics/syntaxes/arithmetics.tmLanguage.json b/examples/arithmetics/syntaxes/arithmetics.tmLanguage.json
@@ -10,7 +10,7 @@
     },
     {
       "name": "keyword.control.arithmetics",
-      "match": "\\b([dD][eE][fF]|[mM][oO][dD][uU][lL][eE])\\b"
+      "match": "(?i)\\b(def|module)\\b"
     }
   ],
   "repository": {

diff --git a/packages/langium-cli/src/generator/highlighting/textmate-generator.ts b/packages/langium-cli/src/generator/highlighting/textmate-generator.ts
@@ -128,14 +128,14 @@ function getRepository(grammar: Grammar, config: LangiumLanguageConfig): Reposit
 function getControlKeywords(grammar: Grammar, pack: LangiumLanguageConfig): Pattern {
     const regex = /[A-Za-z]/;
     const controlKeywords = collectKeywords(grammar).filter(kw => regex.test(kw));
-    const groups = groupKeywords(controlKeywords, pack.caseInsensitive);
+    const groups = groupKeywords(controlKeywords);
     return {
         'name': `keyword.control.${pack.id}`,
-        'match': groups.join('|')
+        'match': `${pack.caseInsensitive ? '(?i)' : ''}${groups.join('|')}`
     };
 }
 
-function groupKeywords(keywords: string[], caseInsensitive: boolean | undefined): string[] {
+function groupKeywords(keywords: string[]): string[] {
     const groups: {
         letter: string[],
         leftSpecial: string[],
@@ -144,7 +144,7 @@ function groupKeywords(keywords: string[], caseInsensitive: boolean | undefined)
     } = { letter: [], leftSpecial: [], rightSpecial: [], special: [] };
 
     keywords.forEach(keyword => {
-        const keywordPattern = caseInsensitive ? RegExpUtils.getCaseInsensitivePattern(keyword) : RegExpUtils.escapeRegExp(keyword);
+        const keywordPattern = RegExpUtils.escapeRegExp(keyword);
         if (/\w/.test(keyword[0])) {
             if (/\w/.test(keyword[keyword.length - 1])) {
                 groups.letter.push(keywordPattern);

diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts
@@ -11,7 +11,7 @@ import { Lexer } from 'chevrotain';
 import { isKeyword, isParserRule, isTerminalRule } from '../languages/generated/ast.js';
 import { streamAllContents } from '../utils/ast-utils.js';
 import { getAllReachableRules, terminalRegex } from '../utils/grammar-utils.js';
-import { getCaseInsensitivePattern, isWhitespace, partialMatches } from '../utils/regexp-utils.js';
+import { escapeRegExp, isWhitespace, partialMatches } from '../utils/regexp-utils.js';
 import { stream } from '../utils/stream.js';
 
 export interface TokenBuilderOptions {
@@ -53,14 +53,10 @@ export class DefaultTokenBuilder implements TokenBuilder {
         const terminalTokens: TokenType[] = this.buildTerminalTokens(reachableRules);
         const tokens: TokenType[] = this.buildKeywordTokens(reachableRules, terminalTokens, options);
 
-        terminalTokens.forEach(terminalToken => {
-            const pattern = terminalToken.PATTERN;
-            if (typeof pattern === 'object' && pattern && 'test' in pattern && isWhitespace(pattern)) {
-                tokens.unshift(terminalToken);
-            } else {
-                tokens.push(terminalToken);
-            }
-        });
+        // Add all terminals tokens to the end in the order they were defined
+        // Chevrotain documentation recommends to add Whitespace-like tokens at the start
+        // However, assuming the lexer is able to optimize the tokens, it should not matter
+        tokens.push(...terminalTokens);
         // We don't need to add the EOF token explicitly.
         // It is automatically available at the end of the token stream.
         return tokens;
@@ -148,7 +144,7 @@ export class DefaultTokenBuilder implements TokenBuilder {
 
     protected buildKeywordPattern(keyword: Keyword, caseInsensitive: boolean): TokenPattern {
         return caseInsensitive ?
-            new RegExp(getCaseInsensitivePattern(keyword.value)) :
+            new RegExp(escapeRegExp(keyword.value), 'i') :
             keyword.value;
     }
 

diff --git a/packages/langium/src/utils/regexp-utils.ts b/packages/langium/src/utils/regexp-utils.ts
@@ -155,12 +155,6 @@ export function escapeRegExp(value: string): string {
     return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
 }
 
-export function getCaseInsensitivePattern(keyword: string): string {
-    return Array.prototype.map.call(keyword, letter =>
-        /\w/.test(letter) ? `[${letter.toLowerCase()}${letter.toUpperCase()}]` : escapeRegExp(letter)
-    ).join('');
-}
-
 /**
  * Determines whether the given input has a partial match with the specified regex.
  * @param regex The regex to partially match against

diff --git a/packages/langium/test/parser/token-builder.test.ts b/packages/langium/test/parser/token-builder.test.ts
@@ -140,23 +140,23 @@ describe('tokenBuilder#caseInsensitivePattern', () => {
     });
 
     test('should create from keyword with special symbols', () => {
-        expect(implementPattern).toEqual(/@[iI][mM][pP][lL][eE][mM][eE][nN][tT]/);
+        expect(implementPattern).toEqual(/@implement/i);
     });
 
     test('should create from keyword with special escape symbols', () => {
-        expect(strangePattern).toEqual(/\\[sS][tT][rR][aA][nN][gG][eE]\\/);
+        expect(strangePattern).toEqual(/\\strange\\/i);
     });
 
     test('should create from mixed-case word', () => {
-        expect(abcPattern).toEqual(/[aA][bB][cC]/);
+        expect(abcPattern).toEqual(/AbC/i);
     });
 
     test('should create from lower-case word', () => {
-        expect(abPattern).toEqual(/[aA][bB]/);
+        expect(abPattern).toEqual(/ab/i);
     });
 
     test('should create from upper-case word', () => {
-        expect(aPattern).toEqual(/[aA]/);
+        expect(aPattern).toEqual(/A/i);
     });
 
     test('should ignore terminals', () => {