From c48e7dee50cd074fd5ccf0f8afbdad313a3124de Mon Sep 17 00:00:00 2001 From: inferrinizzard Date: Tue, 16 Nov 2021 22:14:48 -0800 Subject: [PATCH] fix zeroturnaround#162, add handling for prefix/suffix only specialWordChars --- src/core/Tokenizer.ts | 12 ++++++------ src/core/regexFactory.ts | 23 ++++++++++++++--------- src/languages/Db2Formatter.ts | 2 +- src/languages/MariaDbFormatter.ts | 2 +- src/languages/MySqlFormatter.ts | 2 +- src/languages/PlSqlFormatter.ts | 2 +- src/languages/TSqlFormatter.ts | 2 +- 7 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/core/Tokenizer.ts b/src/core/Tokenizer.ts index e06bec82f..ab917b155 100644 --- a/src/core/Tokenizer.ts +++ b/src/core/Tokenizer.ts @@ -16,7 +16,7 @@ interface TokenizerOptions { indexedPlaceholderTypes?: string[]; namedPlaceholderTypes: string[]; lineCommentTypes: string[]; - specialWordChars?: string[]; + specialWordChars?: { prefix?: string; suffix?: string; any: string }; operators?: string[]; } @@ -52,23 +52,23 @@ export default class Tokenizer { [TokenType.STRING]: regexFactory.createStringRegex(cfg.stringTypes), [TokenType.RESERVED_KEYWORD]: regexFactory.createReservedWordRegex( cfg.reservedKeywords, - cfg.specialWordChars + (cfg.specialWordChars?.any ?? '') + (cfg.specialWordChars?.suffix ?? '') ), [TokenType.RESERVED_DEPENDENT_CLAUSE]: regexFactory.createReservedWordRegex( cfg.reservedDependentClauses ?? [], - cfg.specialWordChars + (cfg.specialWordChars?.any ?? '') + (cfg.specialWordChars?.suffix ?? '') ), [TokenType.RESERVED_LOGICAL_OPERATOR]: regexFactory.createReservedWordRegex( cfg.reservedLogicalOperators, - cfg.specialWordChars + (cfg.specialWordChars?.any ?? '') + (cfg.specialWordChars?.suffix ?? '') ), [TokenType.RESERVED_COMMAND]: regexFactory.createReservedWordRegex( cfg.reservedCommands, - cfg.specialWordChars + (cfg.specialWordChars?.any ?? '') + (cfg.specialWordChars?.suffix ?? '') ), [TokenType.RESERVED_BINARY_COMMAND]: regexFactory.createReservedWordRegex( cfg.reservedBinaryCommands, - cfg.specialWordChars + (cfg.specialWordChars?.any ?? '') + (cfg.specialWordChars?.suffix ?? '') ), [TokenType.OPERATOR]: regexFactory.createOperatorRegex('+-/*%&|^><=.,;[]{}`:$', [ '<>', diff --git a/src/core/regexFactory.ts b/src/core/regexFactory.ts index 75fdcd5f0..0d17ce624 100644 --- a/src/core/regexFactory.ts +++ b/src/core/regexFactory.ts @@ -15,24 +15,29 @@ export function createLineCommentRegex(lineCommentTypes: string[]) { ); } -export function createReservedWordRegex( - reservedKeywords: string[], - specialWordChars: string[] = [] -) { +export function createReservedWordRegex(reservedKeywords: string[], specialWordChars = '') { if (reservedKeywords.length === 0) { return new RegExp(`^\b$`, 'u'); } const reservedKeywordsPattern = sortByLengthDesc(reservedKeywords) .join('|') .replace(/ /gu, '\\s+'); - return new RegExp(`^(${reservedKeywordsPattern})(?![${specialWordChars.join('')}]+)\\b`, 'iu'); + return new RegExp( + `^(${reservedKeywordsPattern})(?![${escapeRegExp(specialWordChars)}]+)\\b`, + 'iu' + ); } -export function createWordRegex(specialChars: string[] = []) { +export function createWordRegex( + specialChars: { any?: string; suffix?: string; prefix?: string } = {} +) { + const prefixLookBehind = `(?<=[${escapeRegExp(specialChars.prefix ?? '')}]?)`; + const suffixLookAhead = `(?=[${escapeRegExp(specialChars.suffix ?? '')}]?)`; + const unicodeWordChar = + '\\p{Alphabetic}\\p{Mark}\\p{Decimal_Number}\\p{Connector_Punctuation}\\p{Join_Control}'; + const specialWordChars = `${escapeRegExp(specialChars.any ?? '')}`; return new RegExp( - `^([\\p{Alphabetic}\\p{Mark}\\p{Decimal_Number}\\p{Connector_Punctuation}\\p{Join_Control}${specialChars.join( - '' - )}]+)`, + `^(${prefixLookBehind}([${unicodeWordChar}${specialWordChars}]+)${suffixLookAhead})`, 'u' ); } diff --git a/src/languages/Db2Formatter.ts b/src/languages/Db2Formatter.ts index 2e5c6f22c..6192ddeef 100644 --- a/src/languages/Db2Formatter.ts +++ b/src/languages/Db2Formatter.ts @@ -872,7 +872,7 @@ export default class Db2Formatter extends Formatter { static indexedPlaceholderTypes = ['?']; static namedPlaceholderTypes = [':']; static lineCommentTypes = ['--']; - static specialWordChars = ['#', '@']; + static specialWordChars = { any: '#@' }; static operators = ['**', '!>', '!<', '||']; tokenizer() { diff --git a/src/languages/MariaDbFormatter.ts b/src/languages/MariaDbFormatter.ts index d6b1b3b90..13ee89e97 100644 --- a/src/languages/MariaDbFormatter.ts +++ b/src/languages/MariaDbFormatter.ts @@ -1161,7 +1161,7 @@ export default class MariaDbFormatter extends Formatter { static indexedPlaceholderTypes = ['?']; static namedPlaceholderTypes = []; static lineCommentTypes = ['--', '#']; - static specialWordChars = ['@']; + static specialWordChars = { any: '@' }; static operators = [':=', '<<', '>>', '<=>', '&&', '||']; tokenizer() { diff --git a/src/languages/MySqlFormatter.ts b/src/languages/MySqlFormatter.ts index 9932f8853..b8e0acfb9 100644 --- a/src/languages/MySqlFormatter.ts +++ b/src/languages/MySqlFormatter.ts @@ -1321,7 +1321,7 @@ export default class MySqlFormatter extends Formatter { static indexedPlaceholderTypes = ['?']; static namedPlaceholderTypes = []; static lineCommentTypes = ['--', '#']; - static specialWordChars = ['@']; + static specialWordChars = { any: '@', prefix: ':' }; static operators = [':=', '<<', '>>', '<=>', '&&', '||', '->', '->>']; tokenizer() { diff --git a/src/languages/PlSqlFormatter.ts b/src/languages/PlSqlFormatter.ts index 71923d04c..bc1846458 100644 --- a/src/languages/PlSqlFormatter.ts +++ b/src/languages/PlSqlFormatter.ts @@ -451,7 +451,7 @@ export default class PlSqlFormatter extends Formatter { static indexedPlaceholderTypes = ['?']; static namedPlaceholderTypes = [':']; static lineCommentTypes = ['--']; - static specialWordChars = ['_', '$', '#', '.', '@']; + static specialWordChars = { any: '_$#.@' }; static operators = [ '||', '**', diff --git a/src/languages/TSqlFormatter.ts b/src/languages/TSqlFormatter.ts index 145ac67c3..e866c9aba 100644 --- a/src/languages/TSqlFormatter.ts +++ b/src/languages/TSqlFormatter.ts @@ -1243,7 +1243,7 @@ export default class TSqlFormatter extends Formatter { static indexedPlaceholderTypes = []; static namedPlaceholderTypes = ['@']; static lineCommentTypes = ['--']; - static specialWordChars = ['#', '@']; + static specialWordChars = { any: '#@' }; static operators = ['!<', '!>', '+=', '-=', '*=', '/=', '%=', '|=', '&=', '^=', '::']; tokenizer() {