Permalink
Browse files

Added support for unicode literals (version 2 only), but not raw variant

  • Loading branch information...
bjpop committed Dec 6, 2010
1 parent 97cef06 commit 4922e025a8ca1577191070d9dd0ce46a85d3e641
@@ -584,6 +584,8 @@ data Expr annot
| ByteStrings { byte_string_strings :: [String], expr_annot :: annot }
-- | Literal strings (to be concatentated together).
| Strings { strings_strings :: [String], expr_annot :: annot }
+ -- | Unicode literal strings (to be concatentated together). Version 2 only.
+ | UnicodeStrings { unicodestrings_strings :: [String], expr_annot :: annot }
-- | Function call.
| Call
{ call_fun :: Expr annot -- ^ Expression yielding a callable object (such as a function).
@@ -583,6 +583,8 @@ data Expr annot
| ByteStrings { byte_string_strings :: [String], expr_annot :: annot }
-- | Literal strings (to be concatentated together).
| Strings { strings_strings :: [String], expr_annot :: annot }
+ -- | Unicode literal strings (to be concatentated together). Version 2 only.
+ | UnicodeStrings { unicodestrings_strings :: [String], expr_annot :: annot }
-- | Function call.
| Call
{ call_fun :: Expr annot -- ^ Expression yielding a callable object (such as a function).
@@ -134,16 +134,19 @@ mkString toToken loc len str = do
return $ toToken loc (take len str)
stringToken :: SrcSpan -> String -> Token
-stringToken loc str = StringToken loc str
+stringToken = StringToken
rawStringToken :: SrcSpan -> String -> Token
-rawStringToken loc str = StringToken loc str
+rawStringToken = StringToken
byteStringToken :: SrcSpan -> String -> Token
-byteStringToken loc str = ByteStringToken loc $ str
+byteStringToken = ByteStringToken
+
+unicodeStringToken :: SrcSpan -> String -> Token
+unicodeStringToken = UnicodeStringToken
rawByteStringToken :: SrcSpan -> String -> Token
-rawByteStringToken loc str = ByteStringToken loc $ str
+rawByteStringToken = ByteStringToken
openParen :: (SrcSpan -> Token) -> Action
openParen mkToken loc _len _str = do
@@ -222,6 +222,7 @@ instance Pretty (Expr a) where
pretty Ellipsis {} = text "..."
pretty (ByteStrings { byte_string_strings = bs }) = hcat (map pretty bs)
pretty (Strings { strings_strings = ss }) = hcat (map prettyString ss)
+ pretty (UnicodeStrings { unicodestrings_strings = ss }) = hcat (map prettyString ss)
pretty (Call { call_fun = f, call_args = args }) = pretty f <> prettyParenList args
pretty (Subscript { subscriptee = e, subscript_expr = sub })
= pretty e <> brackets (pretty sub)
@@ -45,6 +45,7 @@ data Token
-- Literals
| StringToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Literal: string.
| ByteStringToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Literal: byte string.
+ | UnicodeStringToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Literal: unicode string, version 2 only.
| IntegerToken { token_span :: !SrcSpan, token_literal :: !String, token_integer :: !Integer } -- ^ Literal: integer.
| LongIntegerToken { token_span :: !SrcSpan, token_literal :: !String, token_integer :: !Integer } -- ^ Literal: long integer. /Version 2 only/.
| FloatToken { token_span :: !SrcSpan, token_literal :: !String, token_double :: !Double } -- ^ Literal: floating point.
@@ -59,6 +59,7 @@ $not_double_quote = [. \n] # \"
@two_double_quotes = \"\" $not_double_quote
@byte_str_prefix = b | B
@raw_str_prefix = r | R
+@unicode_str_prefix = u | U
@raw_byte_str_prefix = @byte_str_prefix @raw_str_prefix
@backslash_pair = \\ (\\|'|\"|@eol_pattern|$short_str_char)
@backslash_pair_bs = \\ (\\|'|\"|@eol_pattern|$short_byte_str_char)
@@ -100,28 +101,32 @@ $white_no_nl+ ; -- skip whitespace
0 (b | B) $bin_digit+ (l | L) { token LongIntegerToken (readBinary.init) }
}
--- String literals
+-- String literals
<0> {
' @short_str_item_single* ' { mkString stringToken }
@raw_str_prefix ' @short_str_item_single* ' { mkString rawStringToken }
@byte_str_prefix ' @short_byte_str_item_single* ' { mkString byteStringToken }
@raw_byte_str_prefix ' @short_byte_str_item_single* ' { mkString rawByteStringToken }
+ @unicode_str_prefix ' @short_str_item_single* ' { mkString unicodeStringToken }
\" @short_str_item_double* \" { mkString stringToken }
@raw_str_prefix \" @short_str_item_double* \" { mkString rawStringToken }
@byte_str_prefix \" @short_byte_str_item_double* \" { mkString byteStringToken }
@raw_byte_str_prefix \" @short_byte_str_item_double* \" { mkString rawByteStringToken }
+ @unicode_str_prefix \" @short_str_item_double* \" { mkString unicodeStringToken }
''' @long_str_item_single* ''' { mkString stringToken }
@raw_str_prefix ''' @long_str_item_single* ''' { mkString rawStringToken }
@byte_str_prefix ''' @long_byte_str_item_single* ''' { mkString byteStringToken }
@raw_byte_str_prefix ''' @long_byte_str_item_single* ''' { mkString rawByteStringToken }
+ @unicode_str_prefix ''' @long_str_item_single* ''' { mkString unicodeStringToken }
\"\"\" @long_str_item_double* \"\"\" { mkString stringToken }
@raw_str_prefix \"\"\" @long_str_item_double* \"\"\" { mkString rawStringToken }
@byte_str_prefix \"\"\" @long_byte_str_item_double* \"\"\" { mkString byteStringToken }
@raw_byte_str_prefix \"\"\" @long_byte_str_item_double* \"\"\" { mkString rawByteStringToken }
+ @unicode_str_prefix \"\"\" @long_str_item_double* \"\"\" { mkString unicodeStringToken }
}
-- NOTE: we pass lexToken into some functions as an argument.
@@ -116,6 +116,7 @@ import Data.Maybe (maybeToList)
'return' { ReturnToken {} }
'string' { StringToken {} }
'try' { TryToken {} }
+ 'unicodestring' { UnicodeStringToken {} }
'while' { WhileToken {} }
'with' { WithToken {} }
'yield' { YieldToken {} }
@@ -766,6 +767,7 @@ atom
| 'imaginary' { AST.Imaginary (token_double $1) (token_literal $1) (getSpan $1) }
| many1('string') { AST.Strings (map token_literal $1) (getSpan $1) }
| many1('bytestring') { AST.ByteStrings (map token_literal $1) (getSpan $1) }
+ | many1('unicodestring') { AST.UnicodeStrings (map token_literal $1) (getSpan $1) }
-- listmaker: test ( list_for | (',' test)* [','] )

0 comments on commit 4922e02

Please sign in to comment.