From c5e0bce137f1e8d6b3ed9d483e0b9c18283d0c39 Mon Sep 17 00:00:00 2001 From: Erik de Castro Lopo Date: Sat, 19 Mar 2016 10:25:20 +1100 Subject: [PATCH] Lexer.x: Fix regex lexing bug The simplest example of what was failing to compile was `/[/]/` where the forward slash in the middle was being incorrectly detected as the end of the regex. Closes: https://github.com/erikd/language-javascript/issues/44 --- src/Language/JavaScript/Parser/Lexer.x | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Language/JavaScript/Parser/Lexer.x b/src/Language/JavaScript/Parser/Lexer.x index 567d02a..4015074 100644 --- a/src/Language/JavaScript/Parser/Lexer.x +++ b/src/Language/JavaScript/Parser/Lexer.x @@ -90,8 +90,13 @@ $short_str_char = [^ \n \r ' \" \\] -- $RegExpChars = [$printable] # [\\] -- {Non Terminator} = {String Chars1} - {CR} - {LF} -- $NonTerminator = $StringCharsDoubleQuote # [$cr $lf] -$NonTerminator = [$printable] # [$cr $lf] --- {Non Zero Digits}={Digit}-[0] +$regNonTerminator = [$printable] # [$cr $lf \[] + + +$reg_char_class_chars = [^ $cr $lf \[ \] ] +@reg_char_class_escapes = \\ ( \[ | \] ) +@regCharClass = \[ ($reg_char_class_chars | @reg_char_class_escapes)* \] + -- ~ (LineTerminator | MUL | BSLASH | DIV) $RegExpFirstChar = [$printable] # [ $cr $lf \* \\ \/] @@ -219,7 +224,7 @@ tokens :- -- StringLiteral = '"' ( {String Chars1} | '\' {Printable} )* '"' -- | '' ( {String Chars2} | '\' {Printable} )* '' $dq (@stringCharsDoubleQuote *) $dq - | $sq (@stringCharsSingleQuote *) $sq { adapt (mkString stringToken) } + | $sq (@stringCharsSingleQuote *) $sq { adapt (mkString stringToken) } -- HexIntegerLiteral = '0x' {Hex Digit}+ ("0x"|"0X") @HexDigit+ { adapt (mkString hexIntegerToken) } @@ -230,8 +235,10 @@ tokens :- -- RegExp = '/' ({RegExp Chars} | '\' {Non Terminator})+ '/' ( 'g' | 'i' | 'm' )* -- "/" ($RegExpChars | "\" $NonTerminator)+ "/" ("g"|"i"|"m")* { mkString regExToken } --- Based on the Jint version - "/" ($RegExpFirstChar | "\" $NonTerminator) ($RegExpChars | "\" $NonTerminator)* "/" ("g"|"i"|"m")* { adapt (mkString regExToken) } + "/" + ("\" $regNonTerminator | @regCharClass | $RegExpFirstChar) + ("\" $regNonTerminator | @regCharClass | $RegExpChars)* "/" ("g"|"i"|"m")* { adapt (mkString regExToken) } +