Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/arithmetics/syntaxes/arithmetics.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
},
{
"name": "keyword.control.arithmetics",
"match": "\\b([dD][eE][fF]|[mM][oO][dD][uU][lL][eE])\\b"
"match": "(?i)\\b(def|module)\\b"
}
],
"repository": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,14 @@ function getRepository(grammar: Grammar, config: LangiumLanguageConfig): Reposit
function getControlKeywords(grammar: Grammar, pack: LangiumLanguageConfig): Pattern {
const regex = /[A-Za-z]/;
const controlKeywords = collectKeywords(grammar).filter(kw => regex.test(kw));
const groups = groupKeywords(controlKeywords, pack.caseInsensitive);
const groups = groupKeywords(controlKeywords);
return {
'name': `keyword.control.${pack.id}`,
'match': groups.join('|')
'match': `${pack.caseInsensitive ? '(?i)' : ''}${groups.join('|')}`
};
}

function groupKeywords(keywords: string[], caseInsensitive: boolean | undefined): string[] {
function groupKeywords(keywords: string[]): string[] {
const groups: {
letter: string[],
leftSpecial: string[],
Expand All @@ -144,7 +144,7 @@ function groupKeywords(keywords: string[], caseInsensitive: boolean | undefined)
} = { letter: [], leftSpecial: [], rightSpecial: [], special: [] };

keywords.forEach(keyword => {
const keywordPattern = caseInsensitive ? RegExpUtils.getCaseInsensitivePattern(keyword) : RegExpUtils.escapeRegExp(keyword);
const keywordPattern = RegExpUtils.escapeRegExp(keyword);
if (/\w/.test(keyword[0])) {
if (/\w/.test(keyword[keyword.length - 1])) {
groups.letter.push(keywordPattern);
Expand Down
16 changes: 6 additions & 10 deletions packages/langium/src/parser/token-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { Lexer } from 'chevrotain';
import { isKeyword, isParserRule, isTerminalRule } from '../languages/generated/ast.js';
import { streamAllContents } from '../utils/ast-utils.js';
import { getAllReachableRules, terminalRegex } from '../utils/grammar-utils.js';
import { getCaseInsensitivePattern, isWhitespace, partialMatches } from '../utils/regexp-utils.js';
import { escapeRegExp, isWhitespace, partialMatches } from '../utils/regexp-utils.js';
import { stream } from '../utils/stream.js';

export interface TokenBuilderOptions {
Expand Down Expand Up @@ -53,14 +53,10 @@ export class DefaultTokenBuilder implements TokenBuilder {
const terminalTokens: TokenType[] = this.buildTerminalTokens(reachableRules);
const tokens: TokenType[] = this.buildKeywordTokens(reachableRules, terminalTokens, options);

terminalTokens.forEach(terminalToken => {
const pattern = terminalToken.PATTERN;
if (typeof pattern === 'object' && pattern && 'test' in pattern && isWhitespace(pattern)) {
tokens.unshift(terminalToken);
} else {
tokens.push(terminalToken);
}
});
// Add all terminals tokens to the end in the order they were defined
// Chevrotain documentation recommends to add Whitespace-like tokens at the start
// However, assuming the lexer is able to optimize the tokens, it should not matter
tokens.push(...terminalTokens);
// We don't need to add the EOF token explicitly.
// It is automatically available at the end of the token stream.
return tokens;
Expand Down Expand Up @@ -148,7 +144,7 @@ export class DefaultTokenBuilder implements TokenBuilder {

protected buildKeywordPattern(keyword: Keyword, caseInsensitive: boolean): TokenPattern {
return caseInsensitive ?
new RegExp(getCaseInsensitivePattern(keyword.value)) :
new RegExp(escapeRegExp(keyword.value), 'i') :
keyword.value;
}

Expand Down
6 changes: 0 additions & 6 deletions packages/langium/src/utils/regexp-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,6 @@ export function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

export function getCaseInsensitivePattern(keyword: string): string {
return Array.prototype.map.call(keyword, letter =>
/\w/.test(letter) ? `[${letter.toLowerCase()}${letter.toUpperCase()}]` : escapeRegExp(letter)
).join('');
}

/**
* Determines whether the given input has a partial match with the specified regex.
* @param regex The regex to partially match against
Expand Down
10 changes: 5 additions & 5 deletions packages/langium/test/parser/token-builder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,23 +140,23 @@ describe('tokenBuilder#caseInsensitivePattern', () => {
});

test('should create from keyword with special symbols', () => {
expect(implementPattern).toEqual(/@[iI][mM][pP][lL][eE][mM][eE][nN][tT]/);
expect(implementPattern).toEqual(/@implement/i);
});

test('should create from keyword with special escape symbols', () => {
expect(strangePattern).toEqual(/\\[sS][tT][rR][aA][nN][gG][eE]\\/);
expect(strangePattern).toEqual(/\\strange\\/i);
});

test('should create from mixed-case word', () => {
expect(abcPattern).toEqual(/[aA][bB][cC]/);
expect(abcPattern).toEqual(/AbC/i);
});

test('should create from lower-case word', () => {
expect(abPattern).toEqual(/[aA][bB]/);
expect(abPattern).toEqual(/ab/i);
});

test('should create from upper-case word', () => {
expect(aPattern).toEqual(/[aA]/);
expect(aPattern).toEqual(/A/i);
});

test('should ignore terminals', () => {
Expand Down