-
Notifications
You must be signed in to change notification settings - Fork 45
/
LexerUtils.hs
207 lines (173 loc) · 6.22 KB
/
LexerUtils.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
{-# OPTIONS #-}
-----------------------------------------------------------------------------
-- |
-- Module : Language.Python.Common.LexerUtils
-- Copyright : (c) 2009 Bernie Pope
-- License : BSD-style
-- Maintainer : bjpop@csse.unimelb.edu.au
-- Stability : experimental
-- Portability : ghc
--
-- Various utilities to support the Python lexer.
-----------------------------------------------------------------------------
module Language.Python.Common.LexerUtils where
import Control.Monad (liftM)
import Control.Monad.Error.Class (throwError)
import Data.List (foldl')
import Data.Map as Map hiding (null, map)
import Numeric (readHex, readOct)
import Language.Python.Common.Token as Token
import Language.Python.Common.ParserMonad hiding (location)
import Language.Python.Common.SrcLocation
-- Beginning of. BOF = beginning of file, BOL = beginning of line
data BO = BOF | BOL
-- Functions for building tokens
type StartCode = Int
type Action = SrcSpan -> Int -> String -> P Token
lineJoin :: Action
lineJoin span _len _str =
return $ LineJoinToken $ spanStartPoint span
endOfLine :: P Token -> Action
endOfLine lexToken span _len _str = do
setLastEOL $ spanStartPoint span
lexToken
bolEndOfLine :: P Token -> Int -> Action
bolEndOfLine lexToken bol span len inp = do
pushStartCode bol
endOfLine lexToken span len inp
dedentation :: P Token -> Action
dedentation lexToken span _len _str = do
topIndent <- getIndent
-- case compare (endCol span) topIndent of
case compare (startCol span) topIndent of
EQ -> do popStartCode
lexToken
LT -> do popIndent
return dedentToken
GT -> spanError span "indentation error"
indentation :: P Token -> Int -> BO -> Action
-- Check if we are at the EOF. If yes, we may need to generate a newline,
-- in case we came here from BOL (but not BOF).
indentation lexToken _dedentCode bo _loc _len [] = do
popStartCode
case bo of
BOF -> lexToken
BOL -> newlineToken
indentation lexToken dedentCode bo span _len _str = do
popStartCode
parenDepth <- getParenStackDepth
if parenDepth > 0
then lexToken
else do
topIndent <- getIndent
-- case compare (endCol span) topIndent of
case compare (startCol span) topIndent of
EQ -> case bo of
BOF -> lexToken
BOL -> newlineToken
LT -> do pushStartCode dedentCode
newlineToken
-- GT -> do pushIndent (endCol span)
GT -> do pushIndent (startCol span)
return indentToken
where
indentToken = IndentToken span
symbolToken :: (SrcSpan -> Token) -> Action
symbolToken mkToken location _ _ = return (mkToken location)
token :: (SrcSpan -> String -> a -> Token) -> (String -> a) -> Action
token mkToken read location len str
= return $ mkToken location literal (read literal)
where
literal = take len str
-- special tokens for the end of file and end of line
endOfFileToken :: Token
endOfFileToken = EOFToken SpanEmpty
dedentToken = DedentToken SpanEmpty
newlineToken :: P Token
newlineToken = do
loc <- getLastEOL
return $ NewlineToken loc
-- Test if we are at the end of the line or file
atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
atEOLorEOF _user _inputBeforeToken _tokenLength (_loc, inputAfterToken)
= null inputAfterToken || nextChar == '\n' || nextChar == '\r'
where
nextChar = head inputAfterToken
notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
notEOF _user _inputBeforeToken _tokenLength (_loc, inputAfterToken)
= not (null inputAfterToken)
readBinary :: String -> Integer
readBinary
= toBinary . drop 2
where
toBinary = foldl' acc 0
acc b '0' = 2 * b
acc b '1' = 2 * b + 1
readFloat :: String -> Double
readFloat str@('.':cs) = read ('0':readFloatRest str)
readFloat str = read (readFloatRest str)
readFloatRest :: String -> String
readFloatRest [] = []
readFloatRest ['.'] = ".0"
readFloatRest (c:cs) = c : readFloatRest cs
mkString :: (SrcSpan -> String -> Token) -> Action
mkString toToken loc len str = do
return $ toToken loc (take len str)
stringToken :: SrcSpan -> String -> Token
stringToken = StringToken
rawStringToken :: SrcSpan -> String -> Token
rawStringToken = StringToken
byteStringToken :: SrcSpan -> String -> Token
byteStringToken = ByteStringToken
unicodeStringToken :: SrcSpan -> String -> Token
unicodeStringToken = UnicodeStringToken
rawByteStringToken :: SrcSpan -> String -> Token
rawByteStringToken = ByteStringToken
openParen :: (SrcSpan -> Token) -> Action
openParen mkToken loc _len _str = do
let token = mkToken loc
pushParen token
return token
closeParen :: (SrcSpan -> Token) -> Action
closeParen mkToken loc _len _str = do
let token = mkToken loc
topParen <- getParen
case topParen of
Nothing -> spanError loc err1
Just open -> if matchParen open token
then popParen >> return token
else spanError loc err2
where
-- XXX fix these error messages
err1 = "Lexical error ! unmatched closing paren"
err2 = "Lexical error ! unmatched closing paren"
matchParen :: Token -> Token -> Bool
matchParen (LeftRoundBracketToken {}) (RightRoundBracketToken {}) = True
matchParen (LeftBraceToken {}) (RightBraceToken {}) = True
matchParen (LeftSquareBracketToken {}) (RightSquareBracketToken {}) = True
matchParen _ _ = False
-- -----------------------------------------------------------------------------
-- Functionality required by Alex
type AlexInput = (SrcLocation, String)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar _ = error "alexInputPrevChar not used"
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar (loc, input)
| null input = Nothing
| otherwise = Just (nextChar, (nextLoc, rest))
where
nextChar = head input
rest = tail input
nextLoc = moveChar nextChar loc
moveChar :: Char -> SrcLocation -> SrcLocation
moveChar '\n' = incLine 1
moveChar '\t' = incTab
moveChar '\r' = id
moveChar _ = incColumn 1
lexicalError :: P a
lexicalError = do
location <- getLocation
c <- liftM head getInput
throwError $ UnexpectedChar c location
readOctNoO :: String -> Integer
readOctNoO (zero:rest) = read (zero:'O':rest)