Permalink
Browse files

Initial import.

darcs-hash:20090314073228-639e1-aec18678459d95783a92e1068cc0360489196ee9.gz
  • Loading branch information...
0 parents commit 6a4f0829906b9849e22a4b50b89c7c4ee8e8a332 bjpop committed Mar 14, 2009
27 LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2008-2009 Bernard James Pope
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+3. Neither the name of the author nor the names of his contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3 Setup.lhs
@@ -0,0 +1,3 @@
+#!/usr/bin/env runhaskell
+> import Distribution.Simple
+> main = defaultMain
37 language-python.cabal
@@ -0,0 +1,37 @@
+name: language-python
+version: 0.1.0
+cabal-version: >= 1.2
+synopsis: Parsing and pretty printing of Python code.
+description: language-python is a Haskell library for parsing and pretty printing
+ Python code. Currently it only supports Python version 3.0.
+category: Language
+license: BSD3
+license-file: LICENSE
+copyright: (c) 2008-2009 Bernard James Pope
+author: Bernard James Pope
+maintainer: bjpop@csse.unimelb.edu.au
+homepage: http://projects.haskell.org/language-python/
+build-depends: base
+build-type: Simple
+stability: experimental
+ghc-options:
+extra-source-files: src/Language/Python/Version3/Parser/Parser.y
+ src/Language/Python/Version3/Parser/Lexer.x
+
+Library
+ hs-source-dirs: src
+ build-depends: base, containers, pretty, bytestring
+ build-tools: happy, alex
+ exposed-modules:
+ Language.Python.Data.SrcLocation
+ Language.Python.Version3.Parser
+ Language.Python.Version3.Lexer
+ Language.Python.Version3.Syntax.AST
+ Language.Python.Version3.Syntax.Pretty
+ other-modules:
+ Language.Python.Version3.Parser.Parser
+ Language.Python.Version3.Parser.Lexer
+ Language.Python.Version3.Parser.ParserMonad
+ Language.Python.Version3.Parser.ParserUtils
+ Language.Python.Version3.Parser.Token
+
43 src/Language/Python/Data/SrcLocation.hs
@@ -0,0 +1,43 @@
+module Language.Python.Data.SrcLocation (
+ SrcLocation (..),
+ Location (..),
+ initialSrcLocation,
+ incColumn,
+ incLine,
+ incTab
+) where
+
+data SrcLocation =
+ Sloc { sloc_filename :: String
+ , sloc_row :: !Int
+ , sloc_column :: !Int
+ }
+ | NoLocation
+ deriving (Eq, Ord, Show)
+
+class Location a where
+ location :: a -> SrcLocation
+ -- default declaration
+ location x = NoLocation
+
+initialSrcLocation :: String -> SrcLocation
+initialSrcLocation filename
+ = Sloc
+ { sloc_filename = filename
+ , sloc_row = 1
+ , sloc_column = 1
+ }
+
+incColumn :: Int -> SrcLocation -> SrcLocation
+incColumn n loc@(Sloc { sloc_column = col })
+ = loc { sloc_column = col + n }
+
+incTab :: SrcLocation -> SrcLocation
+incTab loc@(Sloc { sloc_column = col })
+ = loc { sloc_column = newCol }
+ where
+ newCol = col + 8 - (col - 1) `mod` 8
+
+incLine :: Int -> SrcLocation -> SrcLocation
+incLine n loc@(Sloc { sloc_row = row })
+ = loc { sloc_column = 1, sloc_row = row + n }
4 src/Language/Python/Version3/Parser.hs
@@ -0,0 +1,4 @@
+module Language.Python.Version3.Parser where
+
+import Language.Python.Version3.Parser.Parser
+import Language.Python.Data.SourceLocation
472 src/Language/Python/Version3/Parser/Lexer.x
@@ -0,0 +1,472 @@
+{
+module Language.Python.Version3.Parser.Lexer
+ (initStartCodeStack, lexToken, endOfFileToken, lexCont) where
+
+import Language.Python.Version3.Parser.Token hiding (True, False)
+import qualified Language.Python.Version3.Parser.Token
+import Language.Python.Version3.Parser.ParserMonad
+import Language.Python.SrcLocation
+import qualified Data.Map as Map
+import Monad (liftM)
+import Data.List (foldl')
+import Numeric (readHex, readOct)
+import qualified Data.ByteString.Char8 as BS (pack)
+}
+
+-- character sets
+$lf = \n -- line feed
+$cr = \r -- carriage return
+$eol_char = [$lf $cr] -- any end of line character
+$not_eol_char = ~$eol_char -- anything but an end of line character
+$white_char = [\ \n\r\f\v\t]
+$white_no_nl = $white_char # $eol_char
+$ident_letter = [a-zA-Z_]
+$digit = 0-9
+$non_zero_digit = 1-9
+$oct_digit = 0-7
+$hex_digit = [$digit a-fA-F]
+$bin_digit = 0-1
+$short_str_char = [^ \n \r ' \" \\]
+$long_str_char = [. \n] # [' \"]
+$short_byte_str_char = \0-\127 # [\n \r ' \" \\]
+$long_byte_str_char = \0-\127 # [' \"]
+$not_single_quote = [. \n] # '
+$not_double_quote = [. \n] # \"
+
+-- macro definitions
+@exponent = (e | E) (\+ | \-)? $digit+
+@fraction = \. $digit+
+@int_part = $digit+
+@point_float = (@int_part? @fraction) | @int_part \.
+@exponent_float = (@int_part | @point_float) @exponent
+@float_number = @point_float | @exponent_float
+@eol_pattern = $lf | $cr $lf | $cr $lf
+@one_single_quote = ' $not_single_quote
+@two_single_quotes = '' $not_single_quote
+@one_double_quote = \" $not_double_quote
+@two_double_quotes = \"\" $not_double_quote
+@byte_str_prefix = b | B
+@raw_str_prefix = r | R
+@raw_byte_str_prefix = @byte_str_prefix @raw_str_prefix
+@backslash_pair = \\ (\\|'|\"|@eol_pattern|$short_str_char)
+@backslash_pair_bs = \\ (\\|'|\"|@eol_pattern|$short_byte_str_char)
+@short_str_item_single = $short_str_char|@backslash_pair|\"
+@short_str_item_double = $short_str_char|@backslash_pair|'
+@short_byte_str_item_single = $short_byte_str_char|@backslash_pair_bs|\"
+@short_byte_str_item_double = $short_byte_str_char|@backslash_pair_bs|'
+@long_str_item_single = $long_str_char|@backslash_pair|@one_single_quote|@two_single_quotes|\"
+@long_str_item_double = $long_str_char|@backslash_pair|@one_double_quote|@two_double_quotes|'
+@long_byte_str_item_single = $long_byte_str_char|@backslash_pair_bs|@one_single_quote|@two_single_quotes|\"
+@long_byte_str_item_double = $long_byte_str_char|@backslash_pair_bs|@one_double_quote|@two_double_quotes|'
+
+tokens :-
+
+-- these rules below could match inside a string literal, but they
+-- will not be applied because the rule for the literal will always
+-- match a longer sequence of characters.
+
+\# ($not_eol_char)* ; -- skip comments
+$white_no_nl+ ; -- skip whitespace
+
+\\ @eol_pattern ; -- line join
+
+<0> {
+ @float_number { token Token.Float readFloat }
+ $non_zero_digit $digit* { token Token.Integer read }
+ (@float_number | @int_part) (j | J) { token Token.Imaginary (readFloat.init) }
+ 0+ { token Token.Integer read }
+ 0 (o | O) $oct_digit+ { token Token.Integer read }
+ 0 (x | X) $hex_digit+ { token Token.Integer read }
+ 0 (b | B) $bin_digit+ { token Token.Integer readBinary }
+}
+
+-- String literals
+
+<0> {
+ ' @short_str_item_single* ' { mkString 1 1 stringToken }
+ @raw_str_prefix ' @short_str_item_single* ' { mkString 2 1 rawStringToken }
+ @byte_str_prefix ' @short_byte_str_item_single* ' { mkString 2 1 byteStringToken }
+ @raw_byte_str_prefix ' @short_byte_str_item_single* ' { mkString 3 1 rawByteStringToken }
+
+ \" @short_str_item_double* \" { mkString 1 1 stringToken }
+ @raw_str_prefix \" @short_str_item_double* \" { mkString 2 1 rawStringToken }
+ @byte_str_prefix \" @short_byte_str_item_double* \" { mkString 2 1 byteStringToken }
+ @raw_byte_str_prefix \" @short_byte_str_item_double* \" { mkString 3 1 rawByteStringToken }
+
+ ''' @long_str_item_single* ''' { mkString 3 3 stringToken }
+ @raw_str_prefix ''' @long_str_item_single* ''' { mkString 4 3 rawStringToken }
+ @byte_str_prefix ''' @long_byte_str_item_single* ''' { mkString 4 3 byteStringToken }
+ @raw_byte_str_prefix ''' @long_byte_str_item_single* ''' { mkString 5 3 rawByteStringToken }
+
+ \"\"\" @long_str_item_double* \"\"\" { mkString 3 3 stringToken }
+ @raw_str_prefix \"\"\" @long_str_item_double* \"\"\" { mkString 4 3 rawStringToken }
+ @byte_str_prefix \"\"\" @long_byte_str_item_double* \"\"\" { mkString 4 3 byteStringToken }
+ @raw_byte_str_prefix \"\"\" @long_byte_str_item_double* \"\"\" { mkString 5 3 rawByteStringToken }
+}
+
+<0> {
+ @eol_pattern { begin bol }
+}
+
+<dedent> () { dedentation }
+
+-- beginning of line
+<bol> {
+ @eol_pattern ;
+ () { indentation BOL }
+}
+
+-- beginning of file
+<bof> {
+ @eol_pattern ;
+ () { indentation BOF }
+}
+
+
+<0> $ident_letter($ident_letter|$digit)* { \loc len str -> keywordOrIdent (take len str) loc }
+
+-- operators and separators
+--
+<0> {
+ "(" { openParen Token.LeftRoundBracket }
+ ")" { closeParen Token.RightRoundBracket }
+ "[" { openParen Token.LeftSquareBracket }
+ "]" { closeParen Token.RightSquareBracket }
+ "{" { openParen Token.LeftBrace }
+ "}" { closeParen Token.RightBrace }
+ "->" { symbolToken Token.RightArrow }
+ "." { symbolToken Token.Dot }
+ "..." { symbolToken Token.Ellipsis }
+ "~" { symbolToken Token.Tilde }
+ "+" { symbolToken Token.Plus }
+ "-" { symbolToken Token.Minus }
+ "**" { symbolToken Token.Exponent }
+ "*" { symbolToken Token.Mult }
+ "/" { symbolToken Token.Div }
+ "//" { symbolToken Token.FloorDiv }
+ "%" { symbolToken Token.Modulo }
+ "<<" { symbolToken Token.ShiftLeft }
+ ">>" { symbolToken Token.ShiftRight }
+ "<" { symbolToken Token.LessThan }
+ "<=" { symbolToken Token.LessThanEquals }
+ ">" { symbolToken Token.GreaterThan }
+ ">=" { symbolToken Token.GreaterThanEquals }
+ "==" { symbolToken Token.Equality }
+ "!=" { symbolToken Token.NotEquals }
+ "^" { symbolToken Token.Xor }
+ "|" { symbolToken Token.BinaryOr }
+ "&&" { symbolToken Token.And }
+ "&" { symbolToken Token.BinaryAnd }
+ "||" { symbolToken Token.Or }
+ ":" { symbolToken Token.Colon }
+ "=" { symbolToken Token.Assign }
+ "+=" { symbolToken Token.PlusAssign }
+ "-=" { symbolToken Token.MinusAssign }
+ "*=" { symbolToken Token.MultAssign }
+ "/=" { symbolToken Token.DivAssign }
+ "%=" { symbolToken Token.ModAssign }
+ "**=" { symbolToken Token.PowAssign }
+ "&=" { symbolToken Token.BinAndAssign }
+ "|=" { symbolToken Token.BinOrAssign }
+ "^=" { symbolToken Token.BinXorAssign }
+ "<<=" { symbolToken Token.LeftShiftAssign }
+ ">>=" { symbolToken Token.RightShiftAssign }
+ "//=" { symbolToken Token.FloorDivAssign }
+ "," { symbolToken Token.Comma }
+ "@" { symbolToken Token.At }
+ \; { symbolToken Token.SemiColon }
+}
+
+{
+-- Functions for building tokens
+
+type StartCode = Int
+type Action = SrcLocation -> Int -> String -> P Token
+
+dedentation :: Action
+dedentation loc _len _str = do
+ let endCol = sloc_column loc
+ topIndent <- getIndent
+ case compare endCol topIndent of
+ EQ -> do popStartCode
+ lexToken
+ LT -> do popIndent
+ return dedentToken
+ GT -> failP loc ["indentation error"]
+
+-- Beginning of. BOF = beginning of file, BOL = beginning of line
+data BO = BOF | BOL
+
+indentation :: BO -> Action
+-- Check if we are at the EOF. If yes, we may need to generate a newline,
+-- in case we came here from BOL (but not BOF).
+indentation bo _loc _len [] = do
+ popStartCode
+ case bo of
+ BOF -> lexToken
+ BOL -> return newlineToken
+indentation bo loc _len _str = do
+ popStartCode
+ parenDepth <- getParenStackDepth
+ if parenDepth > 0
+ then lexToken
+ else do
+ topIndent <- getIndent
+ let endCol = sloc_column loc
+ case compare endCol topIndent of
+ EQ -> case bo of
+ BOF -> lexToken
+ BOL -> return newlineToken
+ LT -> do pushStartCode dedent
+ return newlineToken
+ GT -> do pushIndent endCol
+ return indentToken
+ where
+ -- the location of the newline is not known here
+ newlineToken = Newline NoLocation
+ indentToken = Indent loc
+
+begin :: StartCode -> Action
+begin code loc len inp = do
+ pushStartCode code
+ lexToken
+
+symbolToken :: (SrcLocation -> Token) -> Action
+symbolToken mkToken location _ _ = return (mkToken location)
+
+token_fail :: String -> Action
+token_fail message location _ _
+ = failP location [ "Lexical Error !", message]
+
+token :: (SrcLocation -> a -> Token) -> (String -> a) -> Action
+token mkToken read location len str
+ = return $ mkToken location (read $ take len str)
+
+-- a keyword or an identifier (the syntax overlaps)
+keywordOrIdent :: String -> SrcLocation -> P Token
+keywordOrIdent str location
+ = return $ case Map.lookup str keywords of
+ Just symbol -> symbol location
+ Nothing -> Identifier location str
+
+-- mapping from strings to keywords
+keywords :: Map.Map String (SrcLocation -> Token)
+keywords = Map.fromList keywordNames
+
+keywordNames :: [(String, SrcLocation -> Token)]
+keywordNames =
+ [ ("False", Token.False), ("class", Class), ("finally", Finally), ("is", Is), ("return", Return)
+ , ("None", None), ("continue", Continue), ("for", For), ("lambda", Lambda), ("try", Try)
+ , ("True", Token.True), ("def", Def), ("from", From), ("nonlocal", NonLocal), ("while", While)
+ , ("and", And), ("del", Delete), ("global", Global), ("not", Not), ("with", With)
+ , ("as", As), ("elif", Elif), ("if", If), ("or", Or), ("yield", Yield)
+ , ("assert", Assert), ("else", Else), ("import", Import), ("pass", Pass)
+ , ("break", Break), ("except", Except), ("in", In), ("raise", Raise)
+ ]
+
+-- The lexer starts off in the beginning of file state (bof)
+initStartCodeStack :: [Int]
+initStartCodeStack = [bof,0]
+
+-- special tokens for the end of file and end of line
+endOfFileToken :: Token
+endOfFileToken = EOF
+newlineToken = Newline NoLocation
+dedentToken = Dedent NoLocation
+
+-- Test if we are at the end of the line or file
+atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
+atEOLorEOF _user _inputBeforeToken _tokenLength (_loc, inputAfterToken)
+ = null inputAfterToken || nextChar == '\n' || nextChar == '\r'
+ where
+ nextChar = head inputAfterToken
+
+notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
+notEOF _user _inputBeforeToken _tokenLength (_loc, inputAfterToken)
+ = not (null inputAfterToken)
+
+readBinary :: String -> Integer
+readBinary
+ = toBinary . drop 2
+ where
+ toBinary = foldl' acc 0
+ acc b '0' = 2 * b
+ acc b '1' = 2 * b + 1
+
+{-
+floatnumber ::= pointfloat | exponentfloat
+pointfloat ::= [intpart] fraction | intpart "."
+exponentfloat ::= (intpart | pointfloat) exponent
+intpart ::= digit+
+fraction ::= "." digit+
+exponent ::= ("e" | "E") ["+" | "-"] digit+
+-}
+readFloat :: String -> Double
+readFloat str@('.':cs) = read ('0':readFloatRest str)
+readFloat str = read (readFloatRest str)
+readFloatRest :: String -> String
+readFloatRest [] = []
+readFloatRest ['.'] = ".0"
+readFloatRest (c:cs) = c : readFloatRest cs
+
+mkString :: Int -> Int -> (SrcLocation -> String -> Token) -> Action
+mkString leftSkip rightSkip toToken loc len str = do
+ let contentLen = len - (leftSkip + rightSkip)
+ let contents = take contentLen $ drop leftSkip str
+ -- return $ String loc $ processString contents
+ return $ toToken loc contents
+
+stringToken :: SrcLocation -> String -> Token
+stringToken loc str = String loc $ unescapeString str
+
+rawStringToken :: SrcLocation -> String -> Token
+rawStringToken loc str = String loc $ unescapeRawString str
+
+byteStringToken :: SrcLocation -> String -> Token
+byteStringToken loc str = ByteString loc $ BS.pack $ unescapeString str
+
+rawByteStringToken :: SrcLocation -> String -> Token
+rawByteStringToken loc str = ByteString loc $ BS.pack $ unescapeRawString str
+
+openParen :: (SrcLocation -> Token) -> Action
+openParen mkToken loc _len _str = do
+ let token = mkToken loc
+ pushParen token
+ return token
+
+closeParen :: (SrcLocation -> Token) -> Action
+closeParen mkToken loc _len _str = do
+ let token = mkToken loc
+ topParen <- getParen
+ case topParen of
+ Nothing -> failP loc err1
+ Just open -> if matchParen open token
+ then popParen >> return token
+ else failP loc err2
+ where
+ -- XXX fix these error messages
+ err1 = ["Lexical error ! unmatched closing paren"]
+ err2 = ["Lexical error ! unmatched closing paren"]
+
+matchParen :: Token -> Token -> Bool
+matchParen (LeftRoundBracket {}) (RightRoundBracket {}) = True
+matchParen (LeftBrace {}) (RightBrace {}) = True
+matchParen (LeftSquareBracket {}) (RightSquareBracket {}) = True
+matchParen _ _ = False
+
+unescapeString :: String -> String
+unescapeString ('\\':'\\':cs) = '\\' : unescapeString cs -- Backslash (\)
+unescapeString ('\\':'\'':cs) = '\'' : unescapeString cs -- Single quote (')
+unescapeString ('\\':'"':cs) = '"' : unescapeString cs -- Double quote (")
+unescapeString ('\\':'a':cs) = '\a' : unescapeString cs -- ASCII Bell (BEL)
+unescapeString ('\\':'b':cs) = '\b' : unescapeString cs -- ASCII Backspace (BS)
+unescapeString ('\\':'f':cs) = '\f' : unescapeString cs -- ASCII Formfeed (FF)
+unescapeString ('\\':'n':cs) = '\n' : unescapeString cs -- ASCII Linefeed (LF)
+unescapeString ('\\':'r':cs) = '\r' : unescapeString cs -- ASCII Carriage Return (CR)
+unescapeString ('\\':'t':cs) = '\t' : unescapeString cs -- ASCII Horizontal Tab (TAB)
+unescapeString ('\\':'v':cs) = '\v' : unescapeString cs -- ASCII Vertical Tab (VT)
+unescapeString ('\\':'\n':cs) = unescapeString cs -- line continuation
+unescapeString ('\\':rest@(o:_))
+ | o `elem` octalDigits = unescapeNumeric 3 octalDigits (fst . head . readOct) rest
+unescapeString ('\\':'x':rest@(h:_))
+ | h `elem` hexDigits = unescapeNumeric 2 hexDigits (fst . head . readHex) rest
+unescapeString (c:cs) = c : unescapeString cs
+unescapeString [] = []
+
+unescapeRawString :: String -> String
+unescapeRawString ('\\':'\'':cs) = '\'' : unescapeRawString cs -- Single quote (')
+unescapeRawString ('\\':'"':cs) = '"' : unescapeRawString cs -- Double quote (")
+unescapeRawString ('\\':'\n':cs) = unescapeRawString cs -- line continuation
+unescapeRawString (c:cs) = c : unescapeRawString cs
+unescapeRawString [] = []
+
+{-
+ This is a bit complicated because Python allows between 1 and 3 octal
+ characters after the \, and 1 and 2 hex characters after a \x.
+-}
+unescapeNumeric :: Int -> String -> (String -> Int) -> String -> String
+unescapeNumeric n numericDigits readNumeric str
+ = loop n [] str
+ where
+ loop _ acc [] = [numericToChar acc]
+ loop 0 acc rest
+ = numericToChar acc : unescapeString rest
+ loop n acc (c:cs)
+ | c `elem` numericDigits = loop (n-1) (c:acc) cs
+ | otherwise = numericToChar acc : unescapeString (c:cs)
+ numericToChar :: String -> Char
+ numericToChar = toEnum . readNumeric . reverse
+
+octalDigits, hexDigits :: String
+octalDigits = "01234567"
+hexDigits = "0123456789abcdef"
+
+
+-- -----------------------------------------------------------------------------
+-- Functionality required by Alex
+
+type AlexInput = (SrcLocation, String)
+
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar _ = error "alexInputPrevChar not used"
+
+alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
+alexGetChar (loc, input)
+ | null input = Nothing
+ | otherwise = Just (nextChar, (nextLoc, rest))
+ where
+ nextChar = head input
+ rest = tail input
+ nextLoc = moveChar nextChar loc
+
+moveChar :: Char -> SrcLocation -> SrcLocation
+moveChar '\n' = incLine 1
+moveChar '\t' = incTab
+moveChar '\r' = id
+moveChar _ = incColumn 1
+
+lexicalError :: P a
+lexicalError = do
+ location <- getLocation
+ c <- liftM head getInput
+ failP location
+ ["Lexical error !",
+ "The character " ++ show c ++ " does not fit here."]
+
+parseError :: P a
+parseError = do
+ token <- getLastToken
+ failP (location token)
+ ["Syntax error !",
+ "The symbol `" ++ show token ++ "' does not fit here."]
+
+lexToken :: P Token
+lexToken = do
+ location <- getLocation
+ input <- getInput
+ startCode <- getStartCode
+ case alexScan (location, input) startCode of
+ AlexEOF -> do
+ depth <- getIndentStackDepth
+ if depth <= 1
+ then return endOfFileToken
+ else do
+ popIndent
+ return dedentToken
+ AlexError _ -> lexicalError
+ AlexSkip (nextLocation, rest) len -> do
+ setLocation nextLocation
+ setInput rest
+ lexToken
+ AlexToken (nextLocation, rest) len action -> do
+ setLocation nextLocation
+ setInput rest
+ token <- action location len input
+ setLastToken token
+ return token
+
+lexCont :: (Token -> P a) -> P a
+lexCont cont = do
+ tok <- lexToken
+ cont tok
+}
1,226 src/Language/Python/Version3/Parser/Parser.y
@@ -0,0 +1,1226 @@
+{
+module Language.Python.Version3.Parser.Parser where
+
+import Data.Char (isSpace, isAlpha, isDigit)
+import Language.Python.Version3.Parser.Lexer
+import Language.Python.Version3.Parser.Token hiding (True, False)
+import qualified Language.Python.Version3.Parser.Token
+import Language.Python.Version3.Syntax.AST
+import Language.Python.Version3.Parser.ParserUtils
+import Language.Python.Version3.Parser.ParserMonad
+import Language.Python.SrcLocation
+import Data.List (foldl')
+import qualified Data.ByteString.Char8 as BS (ByteString)
+}
+
+%name parseFileInput FileInput
+%name parseSingleInput SingleInput
+%name parseEval EvalInput
+
+%tokentype { Token }
+%error { parseError }
+%monad { P } { thenP } { returnP }
+%lexer { lexCont } { Token.EOF }
+
+%token
+ import { Token.Import _ }
+ ident { Token.Identifier _ $$ }
+ string { Token.String _ $$ }
+ bytestring { Token.ByteString _ $$ }
+ integer { Token.Integer _ $$ }
+ float { Token.Float _ $$ }
+ imaginary { Token.Imaginary _ $$ }
+ '=' { Token.Assign _ }
+ '(' { Token.LeftRoundBracket _ }
+ ')' { Token.RightRoundBracket _ }
+ '[' { Token.LeftSquareBracket _ }
+ ']' { Token.RightSquareBracket _ }
+ '{' { Token.LeftBrace _ }
+ '}' { Token.RightBrace _ }
+ ',' { Token.Comma _ }
+ ';' { Token.SemiColon _ }
+ ':' { Token.Colon _ }
+ def { Token.Def _ }
+ class { Token.Class _ }
+ while { Token.While _ }
+ for { Token.For _ }
+ if { Token.If _ }
+ with { Token.With _ }
+ as { Token.As _ }
+ true { Token.True _ }
+ false { Token.False _ }
+ none { Token.None _ }
+ return { Token.Return _ }
+ yield { Token.Yield _ }
+ indent { Token.Indent _ }
+ dedent { Token.Dedent _ }
+ newline { Token.Newline _ }
+ try { Token.Try _ }
+ except { Token.Except _ }
+ finally { Token.Finally _ }
+ raise { Token.Raise _ }
+ '+' { Token.Plus _ }
+ '-' { Token.Minus _ }
+ '*' { Token.Mult _ }
+ '/' { Token.Div _ }
+ '>' { Token.GreaterThan _ }
+ '<' { Token.LessThan _ }
+ '==' { Token.Equality _ }
+ '>=' { Token.GreaterThanEquals _ }
+ '<=' { Token.LessThanEquals _ }
+ and { Token.And _ }
+ or { Token.Or _ }
+ '**' { Token.Exponent _ }
+ pass { Token.Pass _ }
+ break { Token.Break _ }
+ continue { Token.Continue _ }
+ del { Token.Delete _ }
+ else { Token.Else _ }
+ elif { Token.Elif _ }
+ not { Token.Not _ }
+ '|' { Token.BinaryOr _ }
+ '^' { Token.Xor _ }
+ '&' { Token.BinaryAnd _ }
+ '>>' { Token.ShiftLeft _ }
+ '<<' { Token.ShiftRight _ }
+ '%' { Token.Modulo _ }
+ floordiv { Token.FloorDiv _ }
+ '~' { Token.Tilde _ }
+ '!=' { Token.NotEquals _ }
+ in { Token.In _ }
+ is { Token.Is _ }
+ lambda { Token.Lambda _ }
+ '.' { Token.Dot _ }
+ '...' { Token.Ellipsis _ }
+ '+=' { Token.PlusAssign _ }
+ '-=' { Token.MinusAssign _ }
+ '*=' { Token.MultAssign _ }
+ '/=' { Token.DivAssign _ }
+ '%=' { Token.ModAssign _ }
+ '**=' { Token.PowAssign _ }
+ '&=' { Token.BinAndAssign _ }
+ '|=' { Token.BinOrAssign _ }
+ '^=' { Token.BinXorAssign _ }
+ '<<=' { Token.LeftShiftAssign _ }
+ '>>=' { Token.RightShiftAssign _ }
+ '//=' { Token.FloorDivAssign _ }
+ '@' { Token.At _ }
+ '->' { Token.RightArrow _ }
+ from { Token.From _ }
+ global { Token.Global _ }
+ nonlocal { Token.NonLocal _ }
+ assert { Token.Assert _ }
+ eof { Token.EOF }
+
+%%
+
+{-
+ Note: newline tokens in the grammar:
+ It seems there are some dubious uses of NEWLINE in the grammar.
+ This is corroborated by this posting:
+ http://mail.python.org/pipermail/python-dev/2005-October/057014.html
+ The general idea is that the lexer does not generate NEWLINE tokens for
+ lines which contain only spaces or comments. However, the grammar sometimes
+ suggests that such tokens may exist.
+-}
+
+
+-- single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+
+{-
+ Complete: but we don't support the newline at the end of a compound stmt
+ because the lexer would not produce a newline there. It seems like a weirdness
+ in the way the interactive input works.
+-}
+
+SingleInput :: { [Statement] }
+SingleInput
+ : newline { [] }
+ | SimpleStmt { $1 }
+ | CompoundStmt {- No newline here! -} { [$1] }
+
+-- file_input: (NEWLINE | stmt)* ENDMARKER
+
+-- Complete: there is no need to mention the ENDMARKER, happy takes care of that.
+
+FileInput :: { Program }
+FileInput : ManyStmtOrNewline {- No ENDMARKER here! -} { Program $1 }
+
+ManyStmtOrNewline :: { [Statement] }
+ManyStmtOrNewline : ManyStmtOrNewlineRec { concat (reverse $1) }
+
+ManyStmtOrNewlineRec :: { [[Statement]] }
+ManyStmtOrNewlineRec
+ : {- empty -} { [] }
+ | ManyStmtOrNewlineRec NewLineOrStmt { $2 : $1 }
+
+NewLineOrStmt :: { [Statement] }
+NewLineOrStmt
+ : newline { [] }
+ | Stmt { $1 }
+
+-- eval_input: testlist NEWLINE* ENDMARKER
+
+-- Complete.
+
+EvalInput :: { Expr }
+EvalInput : TestList ZeroOrMoreNewline { $1 }
+
+ZeroOrMoreNewline :: { () }
+ZeroOrMoreNewline
+ : {- empty -} { () }
+ | ZeroOrMoreNewline newline { () }
+
+{-
+ decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+ decorators: decorator+
+ decorated: decorators (classdef | funcdef)
+-}
+
+-- Complete
+
+Decorator :: { Decorator }
+Decorator
+ : '@' DottedName OptionalArgList newline { Decorator { decorator_name = $2, decorator_args = $3 } }
+
+Decorators :: { [Decorator] }
+Decorators : DecoratorsRev { reverse $1 }
+
+DecoratorsRev :: { [Decorator] }
+DecoratorsRev
+ : Decorator { [$1] }
+ | DecoratorsRev Decorator { $2 : $1 }
+
+Decorated :: { Statement }
+Decorated
+ : Decorators ClassOrFunction { Decorated { decorated_decorators = $1, decorated_def = $2 } }
+
+ClassOrFunction :: { Statement }
+ClassOrFunction
+ : ClassDef { $1 }
+ | FuncDef { $1 }
+
+-- funcdef: 'def' NAME parameters ['->' test] ':' suite
+
+-- Complete
+
+FuncDef :: { Statement }
+FuncDef
+ : def Name Parameters OptionalResultAnnotation ':' Suite
+ { Fun { fun_name = $2 , fun_args = $3, fun_result_annotation = $4, fun_body = $6 } }
+
+OptionalResultAnnotation :: { Maybe Expr }
+OptionalResultAnnotation
+ : {- empty -} { Nothing }
+ | '->' Test { Just $2 }
+
+-- parameters: '(' [typedargslist] ')'
+
+Parameters :: { [Parameter] }
+Parameters : '(' TypedArgsList ')' { $2 }
+
+{-
+ typedargslist: ((tfpdef ['=' test] ',')*
+ ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
+ | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
+-}
+
+-- Complete.
+
+{- Note the grammar allows an optional trailing comma, but only after the
+ positional arguments. If varargs are used (the star forms) then the
+ optional comma is not allowed. Why is this so? I don't know.
+
+ The code below uses right recursion extensively. The Happy docs say that
+ there can be problems with this:
+
+ http://www.haskell.org/happy/doc/html/sec-sequences.html#sec-separators
+
+ "right-recursive rules require stack space proportional to the length
+ of the list being parsed. This can be extremely important where long sequences
+ are involved, for instance in automatically generated output."
+
+ At the moment it seems easier to write using right recursion, but
+ we may want to re-visit the use of right recursion at some point.
+-}
+
+TypedArgsList :: { [Parameter] }
+TypedArgsList : Params { $1 }
+
+Params :: { [Parameter] }
+Params
+ : {- empty -} { [] }
+ | Star { [$1] }
+ | StarStar { [$1] }
+ | Param { [$1] }
+ | Param ',' Params { $1 : $3 }
+ | Star ',' StarParams { $1 : $3 }
+
+StarParams :: { [Parameter] }
+StarParams
+ : Param { [$1] }
+ | Param ',' StarParams { $1 : $3 }
+ | StarStar { [$1] }
+
+-- tfpdef: NAME [':' test]
+-- Complete
+
+TfpDef :: { (Ident, Maybe Expr) }
+TfpDef : Name OptionalColonTest { ($1, $2) }
+
+OptionalColonTest :: { Maybe Expr }
+OptionalColonTest
+ : {- empty -} { Nothing }
+ | ':' Test { Just $2 }
+
+OptionalDefault :: { Maybe Expr }
+OptionalDefault
+ : {- empty -} { Nothing }
+ | '=' Test { Just $2 }
+
+Param :: { Parameter }
+Param
+ : TfpDef OptionalDefault { makeParam $1 $2 }
+
+Star :: { Parameter }
+Star : '*' OptionalTfpDef { makeStarParam $2 }
+
+OptionalTfpDef :: { Maybe (Ident, Maybe Expr) }
+OptionalTfpDef
+ : {- empty -} { Nothing }
+ | TfpDef { Just $1 }
+
+StarStar :: { Parameter }
+StarStar : '**' TfpDef { makeStarStarParam $2 }
+
+{-
+ varargslist: ((vfpdef ['=' test] ',')* ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
+
+ vfpdef: NAME
+-}
+
+-- Complete
+
+{-
+ There is some tedious similarity in these rules to the ones for
+ TypedArgsList. VarArgsList is used for lambda functions, and they
+ do not have parentheses around them (unlike function definitions).
+ Therefore lambda parameters cannot have the optional annotations
+ that normal functions can, because the annotations are introduced
+ using a colon. This would cause ambibguity with the colon
+ that marks the end of the lambda parameter list!
+
+ See the remarks about right recursion in the comments for
+ TypedArgsList.
+-}
+
+VarArgsList :: { [Parameter] }
+VarArgsList : VParams { $1 }
+
+VParams :: { [Parameter] }
+VParams
+ : {- empty -} { [] }
+ | VStar { [$1] }
+ | VStarStar { [$1] }
+ | VParam { [$1] }
+ | VParam ',' VParams { $1 : $3 }
+ | VStar ',' VStarParams { $1 : $3 }
+
+VStarParams :: { [Parameter] }
+VStarParams
+ : VParam { [$1] }
+ | VParam ',' VStarParams { $1 : $3 }
+ | VStarStar { [$1] }
+
+VParam :: { Parameter }
+VParam : VfpDef OptionalDefault { makeParam ($1, Nothing) $2 }
+
+VStar :: { Parameter }
+VStar : '*' OptionalVfpDef { makeStarParam $2 }
+
+OptionalVfpDef :: { Maybe (Ident, Maybe Expr) }
+OptionalVfpDef
+ : {- empty -} { Nothing }
+ | VfpDef { Just ($1, Nothing) }
+
+VStarStar :: { Parameter }
+VStarStar : '**' VfpDef { makeStarStarParam ($2, Nothing) }
+
+VfpDef :: { Ident }
+VfpDef : ident { Ident $1 }
+
+Name :: { Ident }
+Name : ident { Ident $1 }
+
+-- stmt: simple_stmt | compound_stmt
+
+-- Complete
+
+Stmt :: { [Statement] }
+Stmt
+ : SimpleStmt { $1 }
+ | CompoundStmt { [$1] }
+
+-- simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+
+-- Complete
+
+SimpleStmt :: { [Statement] }
+SimpleStmt : SmallStmts OptionalSemiColon newline { reverse $1 }
+
+OptionalSemiColon :: { () }
+OptionalSemiColon
+ : {- empty -} { () }
+ | ';' { () }
+
+SmallStmts :: { [Statement] }
+SmallStmts : SmallStmt { [$1] }
+ | SmallStmts ';' SmallStmt { $3 : $1 }
+
+{-
+small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
+ import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
+-}
+
+-- Complete
+
+SmallStmt :: { Statement }
+SmallStmt
+ : ExprStmt { $1 }
+ | DelStmt { $1 }
+ | PassStmt { $1 }
+ | FlowStmt { $1 }
+ | ImportStmt { $1 }
+ | GlobalStmt { $1 }
+ | NonLocalStmt { $1 }
+ | AssertStmt { $1 }
+
+-- expr_stmt: testlist (augassign (yield_expr|testlist) | ('=' (yield_expr|testlist))*)
+
+-- Complete
+
+ExprStmt :: { Statement }
+ExprStmt : TestList Assignment { makeAssignmentOrExpr $1 $2 }
+
+Assignment :: { Either [Expr] (AssignOp, Expr) }
+Assignment
+ : NormalAssign { Left $1 }
+ | AugAssign { Right $1 }
+
+NormalAssign :: { [Expr] }
+NormalAssign : ZeroOrMoreAssignRev { reverse $1 }
+
+ZeroOrMoreAssignRev :: { [Expr] }
+ZeroOrMoreAssignRev
+ : {- empty -} { [] }
+ | ZeroOrMoreAssignRev '=' YieldOrTestList { $3 : $1 }
+
+YieldOrTestList :: { Expr }
+YieldOrTestList
+ : YieldExpr { $1 }
+ | TestList { $1 }
+
+{-
+ augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+ '<<=' | '>>=' | '**=' | '//=')
+-}
+
+-- Complete
+
+AugAssign :: { (AssignOp, Expr) }
+AugAssign : AugAssignOp YieldOrTestList { ($1, $2) }
+
+AugAssignOp :: { AssignOp }
+AugAssignOp
+ : '+=' { AST.PlusAssign }
+ | '-=' { AST.MinusAssign }
+ | '*=' { AST.MultAssign }
+ | '/=' { AST.DivAssign }
+ | '%=' { AST.ModAssign }
+ | '**=' { AST.PowAssign }
+ | '&=' { AST.BinAndAssign }
+ | '|=' { AST.BinOrAssign }
+ | '^=' { AST.BinXorAssign }
+ | '<<=' { AST.LeftShiftAssign }
+ | '>>=' { AST.RightShiftAssign }
+ | '//=' { AST.FloorDivAssign }
+
+-- del_stmt: 'del' exprlist
+-- Complete
+
+DelStmt :: { Statement }
+DelStmt : del ExprList { AST.Delete { del_exprs = $2 } }
+
+-- pass_stmt: 'pass'
+-- Complete
+
+PassStmt :: { Statement }
+PassStmt : pass { AST.Pass }
+
+-- flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+-- Complete
+
+FlowStmt :: { Statement }
+FlowStmt
+ : BreakStmt { $1 }
+ | ContinueStmt { $1 }
+ | ReturnStmt { $1 }
+ | RaiseStmt { $1 }
+ | YieldStmt { $1 }
+
+-- break_stmt: 'break'
+-- Complete
+
+BreakStmt :: { Statement }
+BreakStmt : break { AST.Break }
+
+-- continue_stmt: 'continue'
+-- Complete
+
+ContinueStmt :: { Statement }
+ContinueStmt : continue { AST.Continue }
+
+-- return_stmt: 'return' [testlist]
+-- Complete
+
+ReturnStmt :: { Statement }
+ReturnStmt : return OptionalTestList { AST.Return { return_expr = $2 }}
+
+-- yield_stmt: yield_expr
+-- Complete
+
+YieldStmt :: { Statement }
+YieldStmt : YieldExpr { StmtExpr { stmt_expr = $1 } }
+
+-- raise_stmt: 'raise' [test ['from' test]]
+-- Complete
+
+RaiseStmt :: { Statement }
+RaiseStmt : raise OptionalTestFrom { AST.Raise { raise_expr = $2 }}
+
+OptionalTestFrom :: { Maybe (Expr, Maybe Expr) }
+OptionalTestFrom
+ : {- empty -} { Nothing }
+ | Test OptionalFrom { Just ($1, $2) }
+
+OptionalFrom :: { Maybe Expr }
+OptionalFrom
+ : {- empty -} { Nothing }
+ | from Test { Just $2 }
+
+-- import_stmt: import_name | import_from
+-- Complete
+
+ImportStmt :: { Statement }
+ImportStmt
+ : ImportName { $1 }
+ | ImportFrom { $1 }
+
+{-
+ # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+ import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
+ 'import' ('*' | '(' import_as_names ')' | import_as_names))
+-}
+
+ImportFrom :: { Statement }
+ImportFrom : from ImportModule import StarOrAsNames { FromImport { from_module = $2, from_items = $4 }}
+
+StarOrAsNames :: { FromItems }
+StarOrAsNames
+ : '*' { ImportEverything }
+ | '(' ImportAsNames ')' { $2 }
+ | ImportAsNames { $1 }
+
+ImportModule :: { ImportModule }
+ImportModule
+ : '.' { ImportDot }
+ | '...' { ImportRelative (ImportRelative ImportDot) }
+ | DottedName { ImportName $1 }
+ | '.' ImportModule { ImportRelative $2 }
+ | '...' ImportModule { ImportRelative (ImportRelative (ImportRelative $2)) }
+
+-- import_as_name: NAME ['as' NAME]
+ImportAsName :: { FromItem }
+ImportAsName
+ : Name OptionalAsName { FromItem { from_item_name = $1, from_as_name = $2 }}
+
+-- import_as_names: import_as_name (',' import_as_name)* [',']
+ImportAsNames :: { FromItems }
+ImportAsNames : ImportAsNamesRev OptionalComma { FromItems (reverse $1) }
+
+ImportAsNamesRev :: { [FromItem] }
+ImportAsNamesRev
+ : ImportAsName { [$1] }
+ | ImportAsNamesRev ',' ImportAsName { $3 : $1 }
+
+-- import_name: 'import' dotted_as_names
+
+ImportName :: { Statement }
+ImportName : import DottedAsNames { AST.Import { import_items = $2 }}
+
+-- dotted_as_names: dotted_as_name (',' dotted_as_name)*
+
+DottedAsNames :: { [ImportItem] }
+DottedAsNames : OneOrMoreDottedAsNamesRev { reverse $1 }
+
+OneOrMoreDottedAsNamesRev :: { [ImportItem] }
+OneOrMoreDottedAsNamesRev
+ : DottedAsName { [$1] }
+ | OneOrMoreDottedAsNamesRev ',' DottedAsName { $3 : $1 }
+
+-- dotted_as_name: dotted_name ['as' NAME]
+
+DottedAsName :: { ImportItem }
+DottedAsName
+ : DottedName OptionalAsName
+ { ImportItem { import_item_name = $1, import_as_name = $2 }}
+
+-- dotted_name: NAME ('.' NAME)*
+-- Complete
+
+DottedName :: { DottedName }
+DottedName : Name DotNames { $1 : reverse $2 }
+
+DotNames :: { DottedName }
+DotNames
+ : {- empty -} { [] }
+ | DotNames '.' Name { $3 : $1 }
+
+-- global_stmt: 'global' NAME (',' NAME)*
+-- Complete
+
+GlobalStmt :: { Statement }
+GlobalStmt : global OneOrMoreNames { AST.Global { global_vars = $2 }}
+
+OneOrMoreNames :: { [Ident] }
+OneOrMoreNames : OneOrMoreNamesRev { reverse $1 }
+
+OneOrMoreNamesRev :: { [Ident] }
+OneOrMoreNamesRev
+ : Name { [$1] }
+ | OneOrMoreNamesRev ',' Name { $3 : $1 }
+
+-- nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
+
+NonLocalStmt :: { Statement }
+NonLocalStmt : nonlocal OneOrMoreNames { AST.NonLocal { nonLocal_vars = $2 }}
+
+-- assert_stmt: 'assert' test [',' test]
+
+AssertStmt :: { Statement }
+AssertStmt : assert TestListRev { AST.Assert { assert_exprs = reverse $2 }}
+
+-- compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+-- Complete
+
+CompoundStmt :: { Statement }
+CompoundStmt
+ : IfStmt { $1 }
+ | WhileStmt { $1 }
+ | ForStmt { $1 }
+ | TryStmt { $1 }
+ | WithStmt { $1 }
+ | FuncDef { $1 }
+ | ClassDef { $1 }
+ | Decorated { $1 }
+
+-- if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+-- Complete
+
+IfStmt :: { Statement }
+IfStmt : IfConditionals OptionalElse { Conditional { cond_guards = $1, cond_else = $2 } }
+
+IfConditionals :: { [(Expr,[Statement])] }
+IfConditionals : If ZeroOrMoreElifs { $1 : $2 }
+
+If :: { (Expr, [Statement]) }
+If : if Test ':' Suite { ($2, $4) }
+
+ZeroOrMoreElifs :: { [(Expr, [Statement])]}
+ZeroOrMoreElifs : ZeroOrMoreElifsRev { reverse $1 }
+
+ZeroOrMoreElifsRev :: { [(Expr, [Statement])]}
+ZeroOrMoreElifsRev
+ : {- empty -} { [] }
+ | ZeroOrMoreElifsRev elif Test ':' Suite { ($3, $5) : $1 }
+
+OptionalElse :: { [Statement] }
+OptionalElse
+ : {- empty -} { [] }
+ | else ':' Suite { $3 }
+
+-- while_stmt: 'while' test ':' suite ['else' ':' suite]
+-- Complete
+
+WhileStmt :: { Statement }
+WhileStmt : while Test ':' Suite OptionalElse { AST.While { while_cond = $2 , while_body = $4, while_else = $5 } }
+
+-- for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+-- Complete
+
+ForStmt :: { Statement }
+ForStmt
+ : for ExprList in TestList ':' Suite OptionalElse
+ { AST.For { for_targets = $2, for_generator = $4, for_body = $6, for_else = $7 } }
+
+{-
+ try_stmt: ('try' ':' suite
+ ((except_clause ':' suite)+ ['else' ':' suite] ['finally' ':' suite] | 'finally' ':' suite))
+-}
+-- Complete
+
+TryStmt :: { Statement }
+TryStmt : try ':' Suite Handlers { makeTry $3 $4 }
+
+Handlers :: { ([Handler], [Statement], [Statement]) }
+Handlers
+ : OneOrMoreExceptClauses OptionalElse OptionalFinally { ($1, $2, $3) }
+ | finally ':' Suite { ([], [], $3) }
+
+OptionalFinally :: { [Statement] }
+OptionalFinally
+ : {- empty -} { [] }
+ | finally ':' Suite { $3 }
+
+OneOrMoreExceptClauses :: { [Handler] }
+OneOrMoreExceptClauses : OneOrMoreExceptClausesRev { reverse $1 }
+
+OneOrMoreExceptClausesRev :: { [Handler] }
+OneOrMoreExceptClausesRev
+ : Handler { [$1] }
+ | OneOrMoreExceptClausesRev Handler { $2 : $1 }
+
+Handler :: { Handler }
+Handler : ExceptClause ':' Suite { ($1, $3) }
+
+{-
+ with_stmt: 'with' test [ with_var ] ':' suite
+ with_var: 'as' expr
+-}
+-- Complete
+
+WithStmt :: { Statement }
+WithStmt : with Test OptionalAs ':' Suite
+ { AST.With { with_context = $2, with_as = $3, with_body = $5 } }
+
+OptionalAs :: { Maybe Expr }
+OptionalAs
+ : {- empty -} { Nothing }
+ | as Expr { Just $2 }
+
+-- except_clause: 'except' [test ['as' NAME]]
+-- Complete
+
+ExceptClause :: { ExceptClause }
+ExceptClause : except ExceptExpr { $2 }
+
+ExceptExpr :: { ExceptClause }
+ExceptExpr
+ : {- empty -} { Nothing }
+ | Test OptionalAsName { Just ($1, $2) }
+
+OptionalAsName :: { Maybe Ident }
+OptionalAsName
+ : {- empty -} { Nothing }
+ | as Name { Just $2 }
+
+-- suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+-- Complete, but we don't have a newline before indent b/c it is redundant
+
+Suite :: { [Statement] }
+Suite
+ : SimpleStmt { $1 }
+ | {- no newline here! -} indent OneOrMoreStmts dedent { $2 }
+
+OneOrMoreStmts :: { [Statement] }
+OneOrMoreStmts : OneOrMoreStmtsRec { reverse (concat $1) }
+
+OneOrMoreStmtsRec :: { [[Statement]] }
+OneOrMoreStmtsRec
+ : Stmt { [$1] }
+ | OneOrMoreStmtsRec Stmt { $2 : $1 }
+
+-- test: or_test ['if' or_test 'else' test] | lambdef
+-- Complete
+
+Test :: { Expr }
+Test
+ : OrTest TestCond { makeConditionalExpr $1 $2 }
+ | LambDef { $1 }
+
+TestCond :: { Maybe (Expr, Expr) }
+TestCond
+ : {- empty -} { Nothing }
+ | if OrTest else Test { Just ($2, $4) }
+
+-- test_nocond: or_test | lambdef_nocond
+-- Complete
+
+TestNoCond :: { Expr }
+TestNoCond
+ : OrTest { $1 }
+ | LambDefNoCond { $1 }
+
+-- lambdef: 'lambda' [varargslist] ':' test
+-- Complete
+
+LambDef :: { Expr }
+LambDef : lambda VarArgsList ':' Test { AST.Lambda $2 $4 }
+
+-- lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+-- Complete
+
+LambDefNoCond :: { Expr }
+LambDefNoCond : lambda VarArgsList ':' TestNoCond { AST.Lambda $2 $4 }
+
+-- or_test: and_test ('or' and_test)*
+-- Complete
+
+OrTest :: { Expr }
+OrTest : AndTest OrSequence { makeBinOp $1 $2 }
+
+OrSequence :: { [(Op, Expr)] }
+OrSequence
+ : {- empty -} { [] }
+ | OrSequence or AndTest { (AST.Or, $3) : $1 }
+
+-- and_test: not_test ('and' not_test)*
+-- Complete
+
+AndTest :: { Expr }
+AndTest : NotTest AndSequence { makeBinOp $1 $2 }
+
+AndSequence :: { [(Op, Expr)] }
+AndSequence
+ : {- empty -} { [] }
+ | AndSequence and NotTest { (AST.And, $3) : $1 }
+
+-- not_test: 'not' not_test | comparison
+-- Complete
+
+NotTest :: { Expr }
+NotTest
+ : not NotTest { UnaryOp {operator = AST.Not, op_arg = $2} }
+ | Comparison { $1 }
+
+-- comparison: star_expr (comp_op star_expr)*
+-- Complete
+
+Comparison :: { Expr }
+Comparison : StarExpr CompSequence { makeBinOp $1 $2 }
+
+CompSequence :: { [(Op, Expr)] }
+CompSequence
+ : {- empty -} { [] }
+ | CompSequence CompOp StarExpr { ($2, $3) : $1 }
+
+-- comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+-- Complete
+
+CompOp :: { Op }
+CompOp
+ : '<' { AST.LessThan }
+ | '>' { AST.GreaterThan }
+ | '==' { AST.Equality }
+ | '>=' { AST.GreaterThanEquals }
+ | '<=' { AST.LessThanEquals }
+ | '!=' { AST.NotEquals }
+ | in { AST.In }
+ | not in { AST.NotIn }
+ | IsOp { $1 }
+
+IsOp :: { Op }
+IsOp : is NotPart { $2 }
+
+NotPart :: { Op }
+NotPart
+ : {- empty -} { AST.Is }
+ | not { AST.IsNot }
+
+-- star_expr: ['*'] expr
+-- Incomplete
+{-
+ XXX The grammar grossly over-states the places where a starred expression can occur.
+ It leads to an ambiguity because of the starred argument lists.
+
+ I think this is a bug in the grammar, and it will need more investigation to see if
+ it can be fixed.
+-}
+
+StarExpr :: { Expr }
+StarExpr : Expr { $1 }
+{-
+ : '*' Expr { Starred { starred_expr = $2 }}
+ | Expr { $1 }
+-}
+
+-- expr: xor_expr ('|' xor_expr)*
+-- Complete
+
+Expr :: { Expr }
+Expr : XorExpr BinaryOrSequence { makeBinOp $1 $2 }
+
+BinaryOrSequence :: { [(Op, Expr)] }
+BinaryOrSequence
+ : {- empty -} { [] }
+ | BinaryOrSequence '|' XorExpr { (AST.BinaryOr, $3) : $1 }
+
+-- xor_expr: and_expr ('^' and_expr)*
+-- Complete
+
+XorExpr :: { Expr }
+XorExpr : AndExpr XorSequence { makeBinOp $1 $2 }
+
+XorSequence :: { [(Op, Expr)] }
+XorSequence
+ : {- empty -} { [] }
+ | XorSequence '^' AndExpr { (AST.Xor, $3) : $1 }
+
+-- and_expr: shift_expr ('&' shift_expr)*
+-- Complete
+
+AndExpr :: { Expr }
+AndExpr : ShiftExpr BinaryAndSequence { makeBinOp $1 $2 }
+
+BinaryAndSequence :: { [(Op, Expr)] }
+BinaryAndSequence
+ : {- empty -} { [] }
+ | BinaryAndSequence '&' ShiftExpr { (AST.BinaryAnd, $3) : $1 }
+
+-- shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+-- Complete
+
+ShiftExpr :: { Expr }
+ShiftExpr : ArithExpr ShiftSequence { makeBinOp $1 $2 }
+
+ShiftSequence :: { [(Op, Expr)] }
+ShiftSequence
+ : {- empty -} { [] }
+ | ShiftSequence ShiftOp ArithExpr { ($2, $3) : $1 }
+
+ShiftOp :: { Op }
+ShiftOp
+ : '<<' { AST.ShiftLeft }
+ | '>>' { AST.ShiftRight }
+
+-- arith_expr: term (('+'|'-') term)*
+-- Complete
+
+ArithExpr :: { Expr }
+ArithExpr : Term TermSequence { makeBinOp $1 $2 }
+
+TermSequence :: { [(Op, Expr)] }
+TermSequence
+ : {- empty -} { [] }
+ | TermSequence ArithOp Term { ($2, $3) : $1 }
+
+ArithOp :: { Op }
+ArithOp
+ : '+' { AST.Plus }
+ | '-' { AST.Minus }
+
+-- term: factor (('*'|'/'|'%'|'//') factor)*
+-- Complete
+
+Term :: { Expr }
+Term : Factor FactorSequence { makeBinOp $1 $2 }
+
+FactorSequence :: { [(Op, Expr)] }
+FactorSequence
+ : {- empty -} { [] }
+ | FactorSequence MultDivOp Factor { ($2, $3) : $1 }
+
+MultDivOp :: { Op }
+MultDivOP
+ : '*' { AST.Multiply }
+ | '/' { AST.Divide }
+ | '%' { AST.Modulo }
+ | floordiv { AST.FloorDivide }
+
+-- factor: ('+'|'-'|'~') factor | power
+-- Complete
+
+Factor :: { Expr }
+Factor
+ : '+' Factor { UnaryOp { operator = AST.Plus, op_arg = $2 } }
+ | '-' Factor { UnaryOp { operator = AST.Minus, op_arg = $2 } }
+ | '~' Factor { UnaryOp { operator = AST.Invert, op_arg = $2 } }
+ | Power { $1 }
+
+-- power: atom trailer* ['**' factor]
+-- Complete, but maybe we should factor out the common prefix?
+
+Power :: { Expr }
+Power : Atom ZeroOrMoreTrailer { addTrailer $1 $2 }
+ | Atom ZeroOrMoreTrailer '**' Factor
+ { makeBinOp (addTrailer $1 $2) [(AST.Exponent, $4)] }
+
+ZeroOrMoreTrailer :: { [Trailer] }
+ZeroOrMoreTrailer : ZeroOrMoreTrailerRev { reverse $1 }
+
+ZeroOrMoreTrailerRev :: { [Trailer] }
+ZeroOrMoreTrailerRev
+ : {- empty -} { [] }
+ | ZeroOrMoreTrailerRev Trailer { $2 : $1 }
+
+{-
+ atom: ('(' [yield_expr|testlist_comp] ')' |
+ '[' [testlist_comp] ']' |
+ '{' [dictorsetmaker] '}' |
+ NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+-}
+-- Incomplete
+
+Atom :: { Expr }
+Atom : ParenForm { $1 }
+ | ListForm { $1 }
+ | DictOrSetForm { $1 }
+ | Name { AST.Var $1 }
+ | integer { AST.Int $1 }
+ | float { AST.Float $1 }
+ | imaginary { AST.Imaginary { imaginary_value = $1 }}
+ | OneOrMoreStrings { AST.Strings (reverse $1) }
+ | OneOrMoreByteStrings { AST.ByteStrings (reverse $1) }
+ | '...' { AST.Ellipsis }
+ | none { AST.None }
+ | true { AST.Bool Prelude.True }
+ | false { AST.Bool Prelude.False }
+
+ParenForm :: { Expr }
+ParenForm : '(' YieldOrTestListComp ')' { $2 }
+
+ListForm :: { Expr }
+ListForm
+ : '[' ']' { List { list_exprs = [] } }
+ | '[' TestListComp ']' { makeListForm $2 }
+
+DictOrSetForm :: { Expr }
+DictOrSetForm
+ : '{' '}' { Dictionary { dict_mappings = [] }}
+ | '{' DictOrSetMaker '}' { $2 }
+
+YieldOrTestListComp :: { Expr }
+YieldOrTestListComp
+ : {- empty -} { Tuple { tuple_exprs = [] } }
+ | YieldExpr { $1 }
+ | TestListComp { either id (\c -> Generator { gen_comprehension = c }) $1 }
+
+OneOrMoreStrings :: { [String] }
+OneOrMoreStrings
+ : string { [$1] }
+ | OneOrMoreStrings string { $2 : $1 }
+
+OneOrMoreByteStrings :: { [BS.ByteString] }
+OneOrMoreByteStrings
+ : bytestring { [$1] }
+ | OneOrMoreByteStrings bytestring { $2 : $1 }
+
+-- testlist_comp: test ( comp_for | (',' test)* [','] )
+-- Complete
+
+TestListComp :: { Either Expr (Comprehension Expr) }
+TestListComp
+ : TestList { Left $1 }
+ | Test CompFor { Right (makeComprehension $1 $2) }
+
+-- trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+-- Complete
+
+Trailer :: { Trailer }
+Trailer
+ : '(' ArgList ')' { TrailerCall $2 }
+ | '[' SubscriptList ']' { TrailerSubscript $2 }
+ | '.' Name { TrailerDot $2 }
+
+-- subscriptlist: subscript (',' subscript)* [',']
+
+SubscriptList :: { [Subscript] }
+SubscriptList : OneOrMoreSubsRev OptionalComma { reverse $1 }
+
+OneOrMoreSubsRev :: { [Subscript] }
+OneOrMoreSubsRev
+ : Subscript { [$1] }
+ | OneOrMoreSubsRev ',' Subscript { $3 : $1 }
+
+-- subscript: test | [test] ':' [test] [sliceop]
+
+Subscript :: { Subscript }
+Subscript
+ : Test { SubscriptExpr $1 }
+ | OptionalTest ':' OptionalTest OptionalSliceOp { SubscriptSlice $1 $3 $4 }
+
+OptionalTest :: { Maybe Expr }
+OptionalTest
+ : {- empty -} { Nothing }
+ | Test { Just $1 }
+
+OptionalSliceOp :: { Maybe (Maybe Expr) }
+OptionalSliceOp
+ : {- empty -} { Nothing }
+ | SliceOp { Just $1 }
+
+-- sliceop: ':' [test]
+
+SliceOp :: { Maybe Expr }
+SliceOp : ':' OptionalTest { $2 }
+
+-- exprlist: star_expr (',' star_expr)* [',']
+-- Complete
+
+ExprList :: { [Expr] }
+ExprList : ExprListRev OptionalComma { reverse $1 }
+
+OptionalComma :: { Bool }
+OptionalComma
+ : {- empty -} { False }
+ | ',' { True }
+
+ExprListRev :: { [Expr] }
+ExprListRev
+ : StarExpr { [$1] }
+ | ExprListRev ',' StarExpr { $3 : $1 }
+
+-- testlist: test (',' test)* [',']
+-- Complete
+
+-- Some trickery here because the of the optional trailing comma, which
+-- could turn a normal expression into a tuple.
+-- Very occasionally, TestList is used to generate something which is not
+-- a tuple (such as the square bracket notation in list literals). Therefore
+-- it would seem like a good idea to not return a tuple in this case, but
+-- a list of expressions. However this would complicate a lot of code
+-- since we would have to carry around the optional comma information.
+-- I've decided to leave it as a tuple, and in special cases, unpack the
+-- tuple and pull out the list of expressions.
+
+TestList :: { Expr }
+TestList : TestListRev OptionalComma { makeTupleOrExpr (reverse $1) $2 }
+
+TestListRev :: { [Expr] }
+TestListRev
+ : Test { [$1] }
+ | TestListRev ',' Test { $3 : $1 }
+
+{-
+ dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+ (test (comp_for | (',' test)* [','])) )
+-}
+
+DictOrSetMaker :: { Expr }
+DictOrSetMaker
+ : Test ':' Test DictRest { makeDictionary ($1, $3) $4 }
+ | Test SetRest { makeSet $1 $2 }
+
+DictRest :: { Either CompFor [(Expr, Expr)] }
+DictRest
+ : CompFor { Left $1 }
+ | ZeroOrMoreDictMappings OptionalComma { Right (reverse $1) }
+
+ZeroOrMoreDictMappings :: { [(Expr, Expr)] }
+ZeroOrMoreDictMappings
+ : {- empty -} { [] }
+ | ZeroOrMoreDictMappings ',' Test ':' Test { ($3,$5) : $1 }
+
+SetRest :: { Either CompFor [Expr] }
+SetRest
+ : CompFor { Left $1 }
+ | ZeroOrMoreCommaTest OptionalComma { Right (reverse $1) }
+
+ZeroOrMoreCommaTest :: { [Expr] }
+ZeroOrMoreCommaTest
+ : {- empty -} { [] }
+ | ZeroOrMoreCommaTest ',' Test { $3 : $1 }
+
+-- classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+-- Complete
+
+ClassDef :: { Statement }
+ClassDef
+ : class Name OptionalArgList ':' Suite
+ { AST.Class { class_name = $2, class_args = $3, class_body = $5 }}
+
+OptionalArgList :: { [Argument] }
+OptionalArgList
+ : {- empty -} { [] }
+ | '(' ArgList ')' { $2 }
+
+{-
+ arglist: (argument ',')* (argument [',']
+ |'*' test (',' argument)* [',' '**' test]
+ |'**' test)
+-}
+
+{-
+ Deviates slightly from the grammar because we allow empty arg lists.
+ The grammar allows for this by making arg lists non-empty but optional.
+ Works out the same in the end.
+-}
+
+ArgList :: { [Argument] }
+ArgList
+ : {- empty -} { [] }
+ | '*' Test { [ArgVarArgsPos { arg_expr = $2 }] }
+ | '**' Test { [ArgVarArgsKeyword { arg_expr = $2 }] }
+ | Argument { [$1] }
+ | Argument ',' ArgList { $1 : $3 }
+ | '*' Test ',' StarArgs { ArgVarArgsPos { arg_expr = $2 } : $4 }
+
+StarArgs :: { [Argument] }
+StarArgs
+ : Argument { [$1] }
+ | Argument ',' StarArgs { $1 : $3 }
+ | '**' Test { [ArgVarArgsKeyword { arg_expr = $2 }] }
+
+-- argument: test [comp_for] | test '=' test # Really [keyword '='] test
+
+Argument :: { Argument }
+Argument
+ : Name '=' Test { ArgKeyword { arg_keyword = $1, arg_expr = $3 }}
+ | Test { ArgExpr { arg_expr = $1 }}
+ | Test CompFor { ArgExpr { arg_expr = Generator { gen_comprehension = makeComprehension $1 $2 }}}
+
+-- comp_iter: comp_for | comp_if
+-- Complete
+
+CompIter :: { CompIter }
+CompIter
+ : CompFor { IterFor $1 }
+ | CompIf { IterIf $1 }
+
+-- comp_for: 'for' exprlist 'in' or_test [comp_iter]
+
+CompFor :: { CompFor }
+CompFor : for ExprList in OrTest OptionalCompIter
+ { CompFor { comp_for_exprs = $2, comp_in_expr = $4, comp_for_iter = $5 }}
+
+OptionalCompIter :: { Maybe CompIter }
+OptionalCompIter
+ : {- empty -} { Nothing }
+ | CompIter { Just $1 }
+
+-- comp_if: 'if' test_nocond [comp_iter]
+
+CompIf :: { CompIf }
+CompIf : if TestNoCond OptionalCompIter { CompIf { comp_if = $2, comp_if_iter = $3 } }
+
+-- testlist1: test (',' test)*
+-- Not used in the rest of the grammar!
+
+-- encoding_decl: NAME
+-- Not used in the rest of the grammqr!
+
+-- yield_expr: 'yield' [testlist]
+-- Complete
+
+YieldExpr :: { Expr }
+YieldExpr : yield OptionalTestList { AST.Yield { yield_expr = $2 } }
+
+OptionalTestList :: { Maybe Expr }
+OptionalTestList
+ : {- empty -} { Nothing }
+ | TestList { Just $1 }
+
+{
+-- Put additional Haskell code in here if needed.
+
+}
190 src/Language/Python/Version3/Parser/ParserMonad.hs
@@ -0,0 +1,190 @@
+module Language.Python.Version3.Parser.ParserMonad
+ ( P
+ , execParser
+ , failP
+ , thenP
+ , returnP
+ , setLocation
+ , getLocation
+ , getInput
+ , setInput
+ , getLastToken
+ , setLastToken
+ , ParseError (..)
+ , State
+ , initialState
+ , pushStartCode
+ , popStartCode
+ , getStartCode
+ , getIndent
+ , pushIndent
+ , popIndent
+ , getIndentStackDepth
+ , getParen
+ , pushParen
+ , popParen
+ , getParenStackDepth
+ ) where
+
+import Language.Python.SrcLocation (SrcLocation (..))
+import Language.Python.Version3.Parser.Token (Token (..))
+
+newtype ParseError = ParseError ([String], SrcLocation)
+ deriving Show
+
+data ParseResult a
+ = POk !State a
+ | PFailed [String] SrcLocation -- The error message and position
+
+data State =
+ State
+ { location :: !SrcLocation -- position at current input location
+ , input :: !String -- the current input
+ , previousToken :: Token -- the previous token
+ , startCodeStack :: [Int] -- a stack of start codes for the state of the lexer
+ , indentStack :: [Int] -- a stack of source column positions of indentation levels
+ , parenStack :: [Token] -- a stack of parens and brackets for indentation handling
+ }
+
+initialState :: SrcLocation -> String -> [Int] -> State
+initialState initLoc inp scStack
+ = State
+ { location = initLoc
+ , input = inp
+ , previousToken = initToken
+ , startCodeStack = scStack
+ , indentStack = [1]
+ , parenStack = []
+ }
+
+newtype P a = P { unP :: State -> ParseResult a }
+
+instance Monad P where
+ return = returnP
+ (>>=) = thenP
+ fail m = getLocation >>= \loc -> failP loc [m]
+
+execParser :: P a -> State -> Either ParseError a
+execParser (P parser) initialState =
+ case parser initialState of
+ PFailed message errloc -> Left (ParseError (message, errloc))
+ POk st result -> Right result
+
+initToken :: Token
+initToken = Newline NoLocation
+
+{-# INLINE returnP #-}
+returnP :: a -> P a
+returnP a = P $ \s -> POk s a
+
+{-# INLINE thenP #-}
+thenP :: P a -> (a -> P b) -> P b
+(P m) `thenP` k = P $ \s ->
+ case m s of
+ POk s' a -> (unP (k a)) s'
+ PFailed err loc -> PFailed err loc
+
+failP :: SrcLocation -> [String] -> P a
+failP loc msg = P $ \_ -> PFailed msg loc
+
+setLocation :: SrcLocation -> P ()
+setLocation loc = P $ \s -> POk (s { location = loc }) ()
+
+getLocation :: P SrcLocation
+getLocation = P $ \s@State{ location = loc } -> POk s loc
+
+getInput :: P String
+getInput = P $ \s@State{ input = inp } -> POk s inp
+
+setInput :: String -> P ()
+setInput inp = P $ \s -> POk (s { input = inp }) ()
+
+getLastToken :: P Token
+getLastToken = P $ \s@State{ previousToken = tok } -> POk s tok
+
+setLastToken :: Token -> P ()
+setLastToken tok = P $ \s -> POk (s { previousToken = tok }) ()
+
+pushStartCode :: Int -> P ()
+pushStartCode code = P newStack
+ where
+ newStack s@State{ startCodeStack = scStack }
+ = POk (s { startCodeStack = code : scStack}) ()
+
+popStartCode :: P ()
+popStartCode = P newStack
+ where
+ newStack s@State{ startCodeStack = scStack, location = loc }
+ = case scStack of
+ [] -> PFailed err loc
+ _:rest -> POk (s { startCodeStack = rest }) ()
+ err = ["fatal error in lexer: attempt to pop empty start code stack"]
+
+getStartCode :: P Int
+getStartCode = P getCode
+ where
+ getCode s@State{ startCodeStack = scStack, location = loc }
+ = case scStack of
+ [] -> PFailed err loc
+ code:_ -> POk s code
+ err = ["fatal error in lexer: start code stack empty on getStartCode"]
+
+pushIndent :: Int -> P ()
+pushIndent indent = P newStack
+ where
+ newStack s@State{ indentStack = iStack }
+ = POk (s { indentStack = indent : iStack }) ()
+
+popIndent :: P ()
+popIndent = P newStack
+ where
+ newStack s@State{ indentStack = iStack, location = loc }
+ = case iStack of
+ [] -> PFailed err loc
+ _:rest -> POk (s { indentStack = rest }) ()
+ -- XXX this message needs fixing
+ err = ["fatal error in lexer: attempt to pop empty indentation stack"]
+
+getIndent :: P Int
+getIndent = P get
+ where
+ get s@State{ indentStack = iStack, location = loc }
+ = case iStack of
+ [] -> PFailed err loc
+ indent:_ -> POk s indent
+ -- XXX this message needs fixing
+ err = ["fatal error in lexer: indent stack empty on getIndent"]
+
+getIndentStackDepth :: P Int
+getIndentStackDepth = P get
+ where
+ get s@State{ indentStack = iStack } = POk s (length iStack)
+
+pushParen :: Token -> P ()
+pushParen symbol = P newStack
+ where
+ newStack s@State{ parenStack = pStack }
+ = POk (s { parenStack = symbol : pStack }) ()
+
+popParen :: P ()
+popParen = P newStack
+ where
+ newStack s@State{ parenStack = pStack, location = loc }
+ = case pStack of
+ [] -> PFailed err loc
+ _:rest -> POk (s { parenStack = rest }) ()
+ -- XXX this message needs fixing
+ err = ["fatal error in lexer: attempt to pop empty paren stack"]
+
+getParen :: P (Maybe Token)
+getParen = P get
+ where
+ get s@State{ parenStack = pStack }
+ = case pStack of
+ [] -> POk s Nothing
+ symbol:_ -> POk s (Just symbol)
+
+getParenStackDepth :: P Int
+getParenStackDepth = P get
+ where
+ get s@State{ parenStack = pStack } = POk s (length pStack)
123 src/Language/Python/Version3/Parser/ParserUtils.hs
@@ -0,0 +1,123 @@
+module Language.Python.Version3.Parser.ParserUtils where
+
+import AST
+import Data.List (foldl')
+import Token hiding (True, False)
+import ParserMonad
+import SrcLocation
+
+makeConditionalExpr :: Expr -> Maybe (Expr, Expr) -> Expr
+makeConditionalExpr e Nothing = e
+makeConditionalExpr e (Just (cond, false_branch))
+ = CondExpr
+ { ce_true_branch = e
+ , ce_condition = cond
+ , ce_false_branch = false_branch
+ }
+
+makeBinOp :: Expr -> [(Op, Expr)] -> Expr
+makeBinOp e es
+ = foldl' (\e1 (op, e2) -> BinaryOp { operator = op, left_op_arg = e1, right_op_arg = e2 } )
+ e (reverse es)
+
+parseError :: Token -> P a
+parseError token
+ = failP (location token) ["Unexpected token", show token]
+
+data Trailer
+ = TrailerCall [Argument]
+ | TrailerSubscript [Subscript]
+ | TrailerDot Ident
+
+data Subscript
+ = SubscriptExpr Expr
+ | SubscriptSlice (Maybe Expr) (Maybe Expr) (Maybe (Maybe Expr))
+
+isProperSlice :: Subscript -> Bool
+isProperSlice (SubscriptSlice {}) = True
+isProperSlice other = False
+
+subscriptToSlice :: Subscript -> Slice
+subscriptToSlice (SubscriptSlice lower upper stride)
+ = SliceProper { slice_lower = lower, slice_upper = upper, slice_stride = stride }
+subscriptToSlice (SubscriptExpr e)
+ = SliceExpr { slice_expr = e }
+
+subscriptToExpr :: Subscript -> Expr
+subscriptToExpr (SubscriptExpr e) = e
+-- this should never happen:
+subscriptToExpr (SubscriptSlice {})
+ = error "subscriptToExpr applied to a proper slice"
+
+addTrailer :: Expr -> [Trailer] -> Expr
+addTrailer
+ = foldl' trail
+ where
+ trail :: Expr -> Trailer -> Expr
+ trail e (TrailerCall args) = Call { call_fun = e, call_args = args }
+ trail e (TrailerSubscript subs)
+ | any isProperSlice subs
+ = SlicedExpr { slicee = e, slices = map subscriptToSlice subs }
+ | otherwise
+ = Subscript { subscriptee = e, subscript_exprs = map subscriptToExpr subs }
+ trail e (TrailerDot ident)
+ = BinaryOp { operator = AST.Dot, left_op_arg = e, right_op_arg = Var ident }
+
+makeTupleOrExpr :: [Expr] -> Bool -> Expr
+makeTupleOrExpr [e] False = e
+makeTupleOrExpr es@[e] True = Tuple { tuple_exprs = es }
+makeTupleOrExpr es@(_:_) _ = Tuple { tuple_exprs = es }
+
+makeAssignmentOrExpr :: Expr -> Either [Expr] (AssignOp, Expr) -> Statement
+makeAssignmentOrExpr e (Left es)
+ = makeNormalAssignment e es
+ where
+ makeNormalAssignment :: Expr -> [Expr] -> Statement
+ makeNormalAssignment e [] = StmtExpr { stmt_expr = e }
+ makeNormalAssignment e es
+ = AST.Assign { assign_to = e : front, assign_expr = head back }
+ where
+ (front, back) = splitAt (len - 1) es
+ len = length es
+makeAssignmentOrExpr e1 (Right (op, e2))
+ = makeAugAssignment e1 op e2
+ where
+ makeAugAssignment :: Expr -> AssignOp -> Expr -> Statement
+ makeAugAssignment e1 op e2
+ = AST.AugmentedAssign { aug_assign_to = e1, aug_assign_op = op, aug_assign_expr = e2 }
+
+makeTry :: Suite -> ([Handler], [Statement], [Statement]) -> Statement
+makeTry body (handlers, elses, finally)
+ = AST.Try { try_body = body, try_excepts = handlers, try_else = elses, try_finally = finally }
+
+makeParam :: (Ident, Maybe Expr) -> Maybe Expr -> Parameter
+makeParam (name, annot) defaultVal
+ = Param { param_name = name, param_annotation = annot, param_default = defaultVal }
+
+makeStarParam :: Maybe (Ident, Maybe Expr) -> Parameter
+makeStarParam Nothing = EndPositional
+makeStarParam (Just (name, annot))
+ = VarArgsPos { param_name = name, param_annotation = annot }
+
+makeStarStarParam :: (Ident, Maybe Expr) -> Parameter
+makeStarStarParam (name, annot)
+ = VarArgsKeyword { param_name = name, param_annotation = annot }
+
+makeComprehension :: Expr -> CompFor -> Comprehension Expr
+makeComprehension e for
+ = Comprehension { comprehension_expr = e, comprehension_for = for }
+
+makeListForm :: Either Expr (Comprehension Expr) -> Expr
+makeListForm (Left tuple@(Tuple {})) = List { list_exprs = tuple_exprs tuple }
+makeListForm (Left other) = List { list_exprs = [other] }
+makeListForm (Right comprehension) = ListComp { list_comprehension = comprehension }
+
+makeSet :: Expr -> Either CompFor [Expr] -> Expr
+makeSet e (Left compFor)
+ = SetComp { set_comprehension = Comprehension { comprehension_expr = e, comprehension_for = compFor }}
+makeSet e (Right es) = Set { set_exprs = e:es }
+
+makeDictionary :: (Expr, Expr) -> Either CompFor [(Expr,Expr)] -> Expr
+makeDictionary e (Left compFor)
+ = DictComp { dict_comprehension = Comprehension { comprehension_expr = e, comprehension_for = compFor }}
+makeDictionary e (Right es) = Dictionary { dict_mappings = e:es }
107 src/Language/Python/Version3/Parser/Token.hs
@@ -0,0 +1,107 @@
+module Language.Python.Version3.Parser.Token
+ ( Token (..)
+ , Ident (..)
+ )
+where
+
+import SrcLocation (SrcLocation (..), Location (location))
+import qualified Data.ByteString.Char8 as BS (ByteString)
+
+newtype Ident = Ident String
+ deriving (Eq, Show, Ord)
+
+data Token
+ = Identifier SrcLocation !String
+ | String SrcLocation !String
+ | ByteString SrcLocation !BS.ByteString
+ | Integer SrcLocation !Integer
+ | Float SrcLocation !Double
+ | Imaginary SrcLocation !Double
+ | Assign SrcLocation
+ | PlusAssign SrcLocation
+ | MinusAssign SrcLocation
+ | MultAssign SrcLocation
+ | DivAssign SrcLocation
+ | ModAssign SrcLocation
+ | PowAssign SrcLocation
+ | BinAndAssign SrcLocation
+ | BinOrAssign SrcLocation
+ | BinXorAssign SrcLocation
+ | LeftShiftAssign SrcLocation
+ | RightShiftAssign SrcLocation
+ | FloorDivAssign SrcLocation
+ | LeftRoundBracket SrcLocation
+ | RightRoundBracket SrcLocation
+ | LeftSquareBracket SrcLocation
+ | RightSquareBracket SrcLocation
+ | LeftBrace SrcLocation
+ | RightBrace SrcLocation
+ | RightArrow SrcLocation
+ | Dot SrcLocation
+ | Ellipsis SrcLocation
+ | Comma SrcLocation
+ | SemiColon SrcLocation
+ | Colon SrcLocation
+ | Def SrcLocation
+ | While SrcLocation
+ | If SrcLocation
+ | True SrcLocation
+ | False SrcLocation
+ | Return SrcLocation
+ | Indent SrcLocation
+ | Dedent SrcLocation
+ | Newline SrcLocation
+ | Try SrcLocation
+ | Except SrcLocation
+ | Raise SrcLocation
+ | Plus SrcLocation
+ | Minus SrcLocation
+ | Mult SrcLocation
+ | Div SrcLocation
+ | GreaterThan SrcLocation
+ | LessThan SrcLocation
+ | Equality SrcLocation
+ | GreaterThanEquals SrcLocation
+ | LessThanEquals SrcLocation
+ | And SrcLocation
+ | Or SrcLocation
+ | Exponent SrcLocation
+ | Pass SrcLocation
+ | Break SrcLocation
+ | Continue SrcLocation
+ | Delete SrcLocation
+ | Else SrcLocation
+ | Not SrcLocation
+ | BinaryOr SrcLocation
+ | Xor SrcLocation
+ | BinaryAnd SrcLocation
+ | ShiftLeft SrcLocation
+ | ShiftRight SrcLocation
+ | Modulo SrcLocation
+ | FloorDiv SrcLocation
+ | Tilde SrcLocation
+ | NotEquals SrcLocation
+ | In SrcLocation
+ | Is SrcLocation
+ | Lambda SrcLocation
+ | Class SrcLocation
+ | Finally SrcLocation
+ | None SrcLocation
+ | For SrcLocation
+ | From SrcLocation
+ | NonLocal SrcLocation
+ | Global SrcLocation
+ | With SrcLocation
+ | As SrcLocation
+ | Elif SrcLocation
+ | Yield SrcLocation
+ | Assert SrcLocation
+ | Import SrcLocation
+ | At SrcLocation
+ | EOF -- does not have src location on purpose.
+ deriving (Show, Eq, Ord)
+
+-- XXX fixme
+instance Location Token where
+ location x = NoLocation
+
192 src/Language/Python/Version3/Syntax/AST.hs
@@ -0,0 +1,192 @@
+module Language.Python.Version3.Syntax.AST
+ ( Program (..)
+ , Statement (..)
+ , Expr (..)
+ , Op (..)
+ , AssignOp (..)
+ , Handler
+ , ExceptClause
+ , Suite
+ , DottedName
+ , Decorator (..)
+ , Parameter (..)
+ , Comprehension (..)
+ , CompFor (..)
+ , CompIf (..)
+ , CompIter (..)
+ , ImportItem (..)
+ , FromItem (..)
+ , FromItems (..)
+ , ImportModule (..)
+ , Slice (..)
+ , Argument (..)
+ , Ident (..)
+ )
+ where
+
+import Token ( Ident (..) )
+import qualified Data.ByteString.Char8 as BS
+
+--------------------------------------------------------------------------------
+
+newtype Program = Program [Statement]
+ deriving Show
+
+type Suite = [Statement]
+
+type DottedName = [Ident]
+
+data ImportItem
+ = ImportItem { import_item_name :: DottedName, import_as_name :: Maybe Ident }
+ deriving Show
+
+data FromItem
+ = FromItem { from_item_name :: Ident, from_as_name :: Maybe Ident }
+ deriving Show
+
+data FromItems = ImportEverything | FromItems [FromItem]
+ deriving Show
+
+data ImportModule
+ = ImportRelative ImportModule -- dot followed by something
+ | ImportDot -- dot on its own
+ | ImportName DottedName -- the name of a module
+ deriving Show
+
+data Statement
+ = Import { import_items :: [ImportItem] }
+ | FromImport { from_module :: ImportModule, from_items :: FromItems }
+ | While { while_cond :: Expr, while_body :: Suite, while_else :: Suite }
+ | For { for_targets :: [Expr], for_generator :: Expr, for_body :: Suite, for_else :: Suite }
+ | Fun { fun_name :: Ident, fun_args :: [Parameter], fun_result_annotation :: Maybe Expr, fun_body :: Suite }
+ | Class { class_name :: Ident, class_args :: [Argument], class_body :: Suite }
+ | Conditional { cond_guards :: [(Expr, Suite)], cond_else :: Suite }
+ -- XXX is the assign_to list always a singleton?
+ | Assign { assign_to :: [Expr], assign_expr :: Expr }
+ | AugmentedAssign { aug_assign_to :: Expr, aug_assign_op :: AssignOp, aug_assign_expr :: Expr }
+ | Decorated { decorated_decorators :: [Decorator], {- Fun or Class -} decorated_def :: Statement }
+ | Return { return_expr :: Maybe Expr }
+ | Try { try_body :: Suite, try_excepts :: [Handler], try_else :: Suite, try_finally :: Suite }
+ | Raise { raise_expr :: Maybe (Expr, Maybe Expr) }
+ | With { with_context :: Expr, with_as :: Maybe Expr, with_body :: Suite }
+ | Pass
+ | Break
+ | Continue
+ | Delete { del_exprs :: [Expr] }
+ | StmtExpr { stmt_expr :: Expr }
+ | Global { global_vars :: [Ident] }
+ | NonLocal { nonLocal_vars :: [Ident] }
+ | Assert { assert_exprs :: [Expr] }
+ deriving Show
+
+data Decorator = Decorator { decorator_name :: DottedName, decorator_args :: [Argument] }
+ deriving Show
+
+data Parameter
+ = Param { param_name :: Ident, param_annotation :: Maybe Expr, param_default :: Maybe Expr }
+ | VarArgsPos { param_name :: Ident, param_annotation :: Maybe Expr }
+ | VarArgsKeyword { param_name :: Ident, param_annotation :: Maybe Expr }
+ | EndPositional {- Star on its own, not really a parameter but a marker -}
+ deriving Show
+
+data Argument
+ = ArgExpr { arg_expr :: Expr }
+ | ArgVarArgsPos { arg_expr :: Expr }
+ | ArgVarArgsKeyword { arg_expr :: Expr }
+ | ArgKeyword { arg_keyword :: Ident, arg_expr :: Expr }
+ deriving Show
+
+type Handler = (ExceptClause, Suite)
+type ExceptClause = Maybe (Expr, Maybe Ident)
+
+data Comprehension e
+ = Comprehension { comprehension_expr :: e, comprehension_for :: CompFor }
+ deriving Show
+
+data CompFor = CompFor { comp_for_exprs :: [Expr], comp_in_expr :: Expr, comp_for_iter :: Maybe CompIter }
+ deriving Show
+
+data CompIf = CompIf { comp_if :: Expr, comp_if_iter :: Maybe CompIter }
+ deriving Show
+
+data CompIter = IterFor CompFor | IterIf CompIf
+ deriving Show
+
+data Expr
+ = Var Ident
+ | Int Integer
+ | Float Double
+ | Imaginary { imaginary_value :: Double }
+ | Bool Bool
+ | None
+ | Ellipsis
+ | ByteStrings [BS.ByteString]
+ | Strings [String]
+ | Call { call_fun :: Expr, call_args :: [Argument] }
+ | Subscript { subscriptee :: Expr, subscript_exprs :: [Expr] }
+ | SlicedExpr { slicee :: Expr, slices :: [Slice] }
+ | CondExpr { ce_true_branch :: Expr, ce_condition :: Expr, ce_false_branch :: Expr }
+ | BinaryOp { operator :: Op, left_op_arg :: Expr, right_op_arg :: Expr }
+ | UnaryOp { operator :: Op, op_arg :: Expr }
+ | Lambda { lambda_args :: [Parameter], lambda_body :: Expr }
+ | Tuple { tuple_exprs :: [Expr] }
+ | Yield { yield_expr :: Maybe Expr }
+ | Generator { gen_comprehension :: Comprehension Expr }
+ | ListComp { list_comprehension :: Comprehension Expr }
+ | List { list_exprs :: [Expr] }
+ | Dictionary { dict_mappings :: [(Expr, Expr)] }
+ | DictComp { dict_comprehension :: Comprehension (Expr, Expr) }
+ | Set { set_exprs :: [Expr] }
+ | SetComp { set_comprehension :: Comprehension Expr }
+ | Starred { starred_expr :: Expr }
+ deriving Show
+
+data Slice
+ = SliceProper { slice_lower :: Maybe Expr, slice_upper :: Maybe Expr, slice_stride :: Maybe (Maybe Expr) }
+ | SliceExpr { slice_expr :: Expr }
+ deriving Show
+
+data Op
+ = And
+ | Or
+ | Not
+ | Exponent
+ | LessThan
+ | GreaterThan
+ | Equality
+ | GreaterThanEquals
+ | LessThanEquals
+ | NotEquals
+ | In
+ | Is
+ | IsNot
+ | NotIn
+ | BinaryOr
+ | Xor
+ | BinaryAnd
+ | ShiftLeft
+ | ShiftRight
+ | Multiply
+ | Plus
+ | Minus
+ | Divide
+ | FloorDivide
+ | Invert
+ | Modulo
+ | Dot
+ deriving (Eq, Show)
+
+data AssignOp
+ = PlusAssign -- +=
+ | MinusAssign -- -=
+ | MultAssign -- *=
+ | DivAssign -- /=
+ | ModAssign -- %=
+ | PowAssign -- **=
+ | BinAndAssign -- &=
+ | BinOrAssign -- |=
+ | BinXorAssign -- ^=
+ | LeftShiftAssign -- <<=
+ | RightShiftAssign -- >>=
+ | FloorDivAssign -- //=
+ deriving (Eq, Show)
287 src/Language/Python/Version3/Syntax/Pretty.hs
@@ -0,0 +1,287 @@
+module Language.Python.Version3.Syntax.Pretty where
+
+import AST
+
+-- import Text.PrettyPrint.HughesPJ as HPJ
+import Text.PrettyPrint as TextPP
+import qualified Data.ByteString.Char8 as BS
+
+--------------------------------------------------------------------------------
+
+-- all types which can be turned into a Doc
+class Pretty a where
+ pretty :: a -> Doc
+
+-- turn values into printable strings
+prettyText :: Pretty a => a -> String
+prettyText = render . pretty
+
+-- conditionally wrap parentheses around an item
+parensIf :: Pretty a => (a -> Bool) -> a -> Doc
+parensIf test x = if test x then parens $ pretty x else pretty x
+
+commaList :: Pretty a => [a] -> Doc
+commaList = hsep . punctuate comma . map pretty
+
+instance Pretty BS.ByteString where
+ -- XXX should handle the escaping properly
+ pretty b = text "b" <> text (show $ BS.unpack b)
+
+instance Pretty Int where
+ pretty = int
+
+instance Pretty Integer where
+ pretty = integer
+
+instance Pretty Double where
+ pretty = double
+
+instance Pretty Bool where
+ pretty True = text "True"
+ pretty False = text "False"
+
+instance Pretty a => Pretty (Maybe a) where
+ pretty Nothing = empty
+ pretty (Just x) = pretty x
+
+prettyString :: String -> Doc
+ -- XXX should handle the escaping properly
+prettyString str = text (show str)
+
+instance Pretty Program where
+ -- pretty :: Program -> Doc
+ pretty (Program stmts) = vcat $ map pretty stmts
+
+instance Pretty Ident where
+ pretty (Ident name) = text name
+
+dot :: Doc
+dot = char '.'
+
+prettyDottedName :: DottedName -> Doc
+prettyDottedName [] = empty
+prettyDottedName [name] = pretty name
+prettyDottedName (name:rest@(_:_))
+ = pretty name <> dot <> prettyDottedName rest
+
+instance Pretty ImportItem where
+ pretty (ImportItem {import_item_name = name, import_as_name = asName})
+ = prettyDottedName name <+> (maybe empty (\n -> text "as" <+> pretty n) asName)
+
+instance Pretty FromItem where
+ pretty (FromItem { from_item_name = name, from_as_name = asName })
+ = pretty name <+> (maybe empty (\n -> text "as" <+> pretty n) asName)
+
+instance Pretty FromItems where
+ pretty ImportEverything = char '*'
+ pretty (FromItems [item]) = pretty item
+ pretty (FromItems items) = parens (commaList items)
+
+instance Pretty ImportModule where
+ pretty (ImportRelative importModule) = dot <> pretty importModule
+ pretty ImportDot = dot
+ pretty (ImportName dottedName) = prettyDottedName dottedName
+
+prettySuite :: [Statement] -> Doc
+prettySuite stmts = vcat $ map pretty stmts
+
+optionalKeywordSuite :: String -> [Statement] -> Doc
+optionalKeywordSuite _ [] = empty
+optionalKeywordSuite keyword stmts = text keyword <> colon $+$ indent (prettySuite stmts)
+
+prettyArgList :: [Argument] -> Doc
+prettyArgList = parens . commaList
+
+prettyOptionalArgList :: [Argument] -> Doc
+prettyOptionalArgList [] = empty
+prettyOptionalArgList list = parens $ commaList list
+
+prettyGuards :: [(Expr, Suite)] -> Doc
+prettyGuards [] = empty
+prettyGuards ((cond,body):guards)
+ = text "elif" <+> pretty cond <> colon $+$ indent (prettySuite body) $+$
+ prettyGuards guards
+
+indent :: Doc -> Doc
+indent doc = nest 4 doc
+
+-- XXX is there a better way to do this?
+blankLine :: Doc
+blankLine = text []
+
+instance Pretty Statement where
+ -- pretty :: Statement -> Doc
+ pretty (Import { import_items = items}) = text "import" <+> commaList items
+ pretty stmt@(FromImport {})
+ = text "from" <+> pretty (from_module stmt) <+> text "import" <+> pretty (from_items stmt)
+ pretty stmt@(While {})
+ = text "while" <+> pretty (while_cond stmt) <> colon $+$
+ indent (prettySuite (while_body stmt)) $+$ optionalKeywordSuite "else" (while_else stmt)
+ pretty stmt@(For {})
+ = text "for" <+> commaList (for_targets stmt) <+> text "in" <+> pretty (for_generator stmt) <> colon $+$
+ indent (prettySuite (for_body stmt)) $+$ optionalKeywordSuite "else" (for_else stmt)
+ pretty stmt@(Fun {})
+ = text "def" <+> pretty (fun_name stmt) <> parens (commaList (fun_args stmt)) <+>
+ pretty (fun_result_annotation stmt) <> colon $+$ indent (prettySuite (fun_body stmt))
+ pretty stmt@(Class {})
+ = text "class" <+> pretty (class_name stmt) <> prettyOptionalArgList (class_args stmt) <>
+ colon $+$ indent (prettySuite (class_body stmt))
+ pretty stmt@(Conditional { cond_guards = guards, cond_else = optionalElse })
+ = case guards of
+ (cond,body):xs ->