Skip to content

Commit

Permalink
Preliminary work to bring in modal lexer, as required to disambiguate…
Browse files Browse the repository at this point in the history
… '/' and regexp.

See Section 7 of ECMAScript Language Specification, ver 3 , 24-Mar-2000 for details.
  • Loading branch information
alanz committed Dec 13, 2010
1 parent b0b1e32 commit 39d77ff
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 38 deletions.
61 changes: 23 additions & 38 deletions src/Language/JavaScript/Parser/Lexer.x
Expand Up @@ -50,10 +50,11 @@ $short_str_char = [^ \n \r ' \" \\]
-- {Hex Digit} = {Digit} + [ABCDEF] + [abcdef]
@HexDigit = $digit | [a-fA-F]
-- {RegExp Chars} = {Letter}+{Digit}+['^']+['$']+['*']+['+']+['?']+['{']+['}']+['|']+['-']+['.']+[',']+['#']+['[']+[']']+['_']+['<']+['>']
--$RegExpChars = [$alpha $digit \^\$\*\+\?\{\}\|\-\.\,\#\[\]\_\<\>]
$RegExpChars = [$printable] # [\\]
$RegExpChars = [$alpha $digit \^\$\*\+\?\{\}\|\-\.\,\#\[\]\_\<\>]
--$RegExpChars = [$printable] # [\\]
-- {Non Terminator} = {String Chars1} - {CR} - {LF}
$NonTerminator = $StringChars1 # [$cr $lf]
--$NonTerminator = $StringChars1 # [$cr $lf]
$NonTerminator = [$printable] # [$cr $lf]
-- {Non Zero Digits}={Digit}-[0]
Expand All @@ -70,29 +71,32 @@ $white_char = [\ \f\v\t\r\n]
-- ! ------------------------------------------------- Terminals
tokens :-
-- State: 0 is regex allowed, 1 is / or /= allowed
-- Skip Whitespace
<0> $white_char+ ;
<0,div> $white_char+ ;
-- Skip one line comment
<0> "//"($not_eol_char)* ;
<0,div> "//"($not_eol_char)* ;
-- Skip multi-line comments. Note: may not nest
<0> "/*"($any_char)*"*/" ;
<0,div> "/*"($any_char)*"*/" ;
-- Identifier = {ID Head}{ID Tail}*
<0> @IDHead(@IDTail)* { \loc len str -> keywordOrIdent (take len str) loc }
<0,div> @IDHead(@IDTail)* { \loc len str -> keywordOrIdent (take len str) loc }
-- StringLiteral = '"' ( {String Chars1} | '\' {Printable} )* '"'
-- | '' ( {String Chars2} | '\' {Printable} )* ''
<0> $dq ( $StringChars1 | \\ $printable )* $dq
| $sq ( $StringChars2 | \\ $printable )* $sq { mkString stringToken }
<0,div> $dq ( $StringChars1 | \\ $printable )* $dq
| $sq ( $StringChars2 | \\ $printable )* $sq { mkString stringToken }
-- HexIntegerLiteral = '0x' {Hex Digit}+
<0> "0x" @HexDigit+ { mkString hexIntegerToken }
<0,div> "0x" @HexDigit+ { mkString hexIntegerToken }
-- RegExp = '/' ({RegExp Chars} | '\' {Non Terminator})+ '/' ( 'g' | 'i' | 'm' )*
--<0> "/" ($RegExpChars | "\" $NonTerminator)+ "/" ("g"|"i"|"m")* { mkString regExToken }
--<0,div> "/" ($RegExpChars | "\" $NonTerminator)+ "/" ("g"|"i"|"m")* { mkString regExToken }
-- Note: state 0 only
<0> "/" ($RegExpChars | "\" $NonTerminator)+ "/" ("g"|"i"|"m")* { mkString regExToken }
-- DecimalLiteral= {Non Zero Digits}+ '.' {Digit}* ('e' | 'E' ) {Non Zero Digits}+ {Digit}*
Expand All @@ -101,21 +105,13 @@ tokens :-
-- | {Non Zero Digits}+ {Digit}*
-- | '0'
-- | '0' '.' {Digit}+
<0> $non_zero_digit+ "." $digit* ("e"|"E") $non_zero_digit+ $digit*
<0,div> $non_zero_digit+ "." $digit* ("e"|"E") $non_zero_digit+ $digit*
| $non_zero_digit+ "." $digit*
| "0." $digit+ ("e"|"E") $non_zero_digit+ $digit*
| $non_zero_digit+ $digit*
| "0"
| "0." $digit+ { mkString decimalToken }
-- Comment Start = '/*'
-- Comment End = '*/'
-- Comment Line = '//'
-- <0> {
-- @eol_pattern { bolEndOfLine lexToken bol }
-- }
-- beginning of file
<bof> {
Expand All @@ -124,8 +120,13 @@ tokens :-
-- () { indentation lexToken dedent BOF }
}
-- / or /= only allowed in state 1
<div> {
"/=" { mkString assignToken}
"/" { symbolToken DivToken}
}
<0> {
<0,div> {
\; { symbolToken SemiColonToken}
"," { symbolToken CommaToken}
"?" { symbolToken HookToken}
Expand All @@ -137,7 +138,7 @@ tokens :-
"&" { symbolToken BitwiseAndToken}
"===" { symbolToken StrictEqToken}
"==" { symbolToken EqToken}
"*=" | "/=" | "%=" | "+=" | "-=" | "<<=" | ">>=" | ">>>=" | "&=" | "^=" | "|="
"*=" | "%=" | "+=" | "-=" | "<<=" | ">>=" | ">>>=" | "&=" | "^=" | "|="
{ mkString assignToken}
"=" { symbolToken SimpleAssignToken}
"!==" { symbolToken StrictNeToken}
Expand All @@ -154,7 +155,6 @@ tokens :-
"+" { symbolToken PlusToken}
"-" { symbolToken MinusToken}
"*" { symbolToken MulToken}
"/" { symbolToken DivToken}
"%" { symbolToken ModToken}
"!" { symbolToken NotToken}
"~" { symbolToken BitwiseNotToken}
Expand All @@ -170,21 +170,6 @@ tokens :-
-- <0> {
-- "let" { symbolToken TokenLet }
-- "in" { symbolToken TokenIn }
-- -- "9" { symbolToken TokenInt } --TODO: use real value\
-- $non_zero_digit $digit* { token TokenInt read }
-- "var" { symbolToken TokenVar } --TODO: use real value
-- "=" {symbolToken TokenEq }
-- "+" {symbolToken TokenPlus }
-- "-" {symbolToken TokenMinus }
-- "*" {symbolToken TokenTimes }
-- "/" {symbolToken TokenDiv }
-- "(" {symbolToken TokenOB }
-- ")" {symbolToken TokenCB }
-- }
{
Expand Down
7 changes: 7 additions & 0 deletions src/Language/JavaScript/Parser/Parser.hs
Expand Up @@ -2,6 +2,7 @@ module Language.JavaScript.Parser.Parser (
-- * Parsing
parse
, readJs
, parseFile
-- * Parsing expressions
-- parseExpr
, parseUsing
Expand Down Expand Up @@ -48,6 +49,12 @@ readJs input = do
Left msg -> error (show msg)
Right p -> p

-- parseFile :: FilePath -> AST.JSNode
parseFile filename =
do
x <- readFile (filename)
return $ readJs x


-- | Parse one compound statement, or a sequence of simple statements.
-- Generally used for interactive input, such as from the command line of an interpreter.
Expand Down
6 changes: 6 additions & 0 deletions src/Language/JavaScript/Parser/ParserMonad.hs
Expand Up @@ -32,6 +32,7 @@ module Language.JavaScript.Parser.ParserMonad
, pushStartCode
, popStartCode
, getStartCode
, setStartCode
, getIndent
, pushIndent
, popIndent
Expand Down Expand Up @@ -161,6 +162,11 @@ getStartCode = do
[] -> internalError "fatal error in lexer: start code stack empty on getStartCode"
code:_ -> return code

setStartCode :: Int -> P ()
setStartCode code = do
popStartCode
pushStartCode code

pushIndent :: Int -> P ()
pushIndent indent = do
oldStack <- gets indentStack
Expand Down

0 comments on commit 39d77ff

Please sign in to comment.