Skip to content

Commit

Permalink
Refactor SourceToken
Browse files Browse the repository at this point in the history
  • Loading branch information
fabianhjr committed May 23, 2020
1 parent 840e020 commit d089ad6
Show file tree
Hide file tree
Showing 10 changed files with 143 additions and 97 deletions.
2 changes: 1 addition & 1 deletion docs/source/reference/records.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Lexical structure
constructor ``RF "foo"``)

* ``Foo.bar.baz`` starting with uppercase ``F`` is one lexeme, a namespaced
identifier: ``NSIdent ["baz", "bar", "Foo"]``
identifier: ``NamespacedIdent ["baz", "bar", "Foo"]``

* ``foo.bar.baz`` starting with lowercase ``f`` is three lexemes: ``foo``,
``.bar``, ``.baz``
Expand Down
6 changes: 3 additions & 3 deletions src/Idris/IDEMode/Parser.idr
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ symbols = ["(", ":", ")"]
ideTokens : TokenMap SourceToken
ideTokens =
map (\x => (exact x, Symbol)) symbols ++
[(digits, \x => Literal (cast x)),
(stringLit, \x => StrLit (stripQuotes x)),
(identAllowDashes, \x => NSIdent [x]),
[(digits, \x => IntegerLit (cast x)),
(stringLit, \x => StringLit (stripQuotes x)),
(identAllowDashes, \x => Ident x),
(space, Comment)]

idelex : String -> Either (Int, Int, String) (List (TokenData SourceToken))
Expand Down
4 changes: 2 additions & 2 deletions src/Idris/Package.idr
Original file line number Diff line number Diff line change
Expand Up @@ -140,13 +140,13 @@ field fname
<|> do exactIdent "modules"; symbol "="
ms <- sepBy1 (symbol ",")
(do start <- location
ns <- nsIdent
ns <- namespacedIdent
end <- location
Parser.Core.pure (MkFC fname start end, ns))
Parser.Core.pure (PModules ms)
<|> do exactIdent "main"; symbol "="
start <- location
m <- nsIdent
m <- namespacedIdent
end <- location
Parser.Core.pure (PMainMod (MkFC fname start end) m)
<|> do exactIdent "executable"; symbol "="
Expand Down
10 changes: 5 additions & 5 deletions src/Idris/Parser.idr
Original file line number Diff line number Diff line change
Expand Up @@ -1094,7 +1094,7 @@ namespaceHead : SourceRule (List String)
namespaceHead
= do keyword "namespace"
commit
ns <- nsIdent
ns <- namespacedIdent
pure ns

namespaceDecl : FileName -> IndentInfo -> SourceRule PDecl
Expand Down Expand Up @@ -1489,9 +1489,9 @@ import_ fname indents
keyword "import"
reexp <- option False (do keyword "public"
pure True)
ns <- nsIdent
ns <- namespacedIdent
nsAs <- option ns (do exactIdent "as"
nsIdent)
namespacedIdent)
end <- location
atEnd indents
pure (MkImport (MkFC fname start end) reexp ns nsAs)
Expand All @@ -1502,7 +1502,7 @@ prog fname
= do start <- location
nspace <- option ["Main"]
(do keyword "module"
nsIdent)
namespacedIdent)
end <- location
imports <- block (import_ fname)
ds <- block (topDecl fname)
Expand All @@ -1515,7 +1515,7 @@ progHdr fname
= do start <- location
nspace <- option ["Main"]
(do keyword "module"
nsIdent)
namespacedIdent)
end <- location
imports <- block (import_ fname)
pure (MkModule (MkFC fname start end)
Expand Down
54 changes: 44 additions & 10 deletions src/Parser/Lexer/Common.idr
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,27 @@ comment
-- There are multiple variants.

public export
data Flavour = Capitalised | AllowDashes | Normal
data Flavour = AllowDashes | Capitalised | Module | Normal

export
isIdentStart : Flavour -> Char -> Bool
isIdentStart _ '_' = True
isIdentStart Capitalised x = isUpper x || x > chr 160
isIdentStart _ x = isAlpha x || x > chr 160
isIdentStart f x = checkAllowed || x > chr 160
where
checkAllowed : Bool
checkAllowed = case f of
Capitalised => isUpper x || x == '_'
Module => isUpper x
_ => isAlpha x

export
isIdentTrailing : Flavour -> Char -> Bool
isIdentTrailing AllowDashes '-' = True
isIdentTrailing _ '\'' = True
isIdentTrailing _ '_' = True
isIdentTrailing _ x = isAlphaNum x || x > chr 160
isIdentTrailing f x = checkExtraAllowed || isAlphaNum x || x > chr 160
where
generallyAllowed : Bool
generallyAllowed = x `elem` ['\'', '_']
checkExtraAllowed : Bool
checkExtraAllowed = case f of
AllowDashes => generallyAllowed || x == '-'
Module => False
_ => generallyAllowed

export %inline
isIdent : Flavour -> String -> Bool
Expand All @@ -45,3 +52,30 @@ ident : Flavour -> Lexer
ident flavour =
(pred $ isIdentStart flavour) <+>
(many . pred $ isIdentTrailing flavour)

export
isIdentNormal : String -> Bool
isIdentNormal = isIdent Normal

export
identNormal : Lexer
identNormal = ident Normal

export
identAllowDashes : Lexer
identAllowDashes = ident AllowDashes

namespaceIdent' : Bool -> Lexer
namespaceIdent' b = ident Capitalised <+> many leavingLastIdent
where
leavingLastIdent : Lexer
leavingLastIdent = if b then (is '.' <+> ident Capitalised)
else (is '.' <+> ident Capitalised <+> expect (is '.'))

export
namespaceIdent : Lexer
namespaceIdent = namespaceIdent' False

export
moduleIdent : Lexer
moduleIdent = namespaceIdent' True
13 changes: 13 additions & 0 deletions src/Parser/Lexer/Package.idr
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module Parser.Lexer.Package

import public Text.Lexer
import public Text.Parser

%default total

public export
data PackageToken
= Comment
| Ident
| Module
| StringLit
100 changes: 46 additions & 54 deletions src/Parser/Lexer/Source.idr
Original file line number Diff line number Diff line change
Expand Up @@ -13,45 +13,49 @@ import Utils.String

public export
data SourceToken
= NSIdent (List String)
| HoleIdent String
| Literal Integer
| StrLit String
| CharLit String
-- Literals
= CharLit String
| DoubleLit Double
| IntegerLit Integer
| StringLit String
-- Identifiers
| HoleIdent String
| Ident String
| NamespacedIdent (List String)
| RecordField String
| Symbol String
| Keyword String
| Unrecognised String
-- Comments
| Comment String
| DocComment String
-- Special
| CGDirective String
| RecordField String
| Pragma String
| EndInput
| Keyword String
| Pragma String
| Unrecognised String

export
Show SourceToken where
show (HoleIdent x) = "hole identifier " ++ x
show (Literal x) = "literal " ++ show x
show (StrLit x) = "string " ++ show x
-- Literals
show (CharLit x) = "character " ++ show x
show (DoubleLit x) = "double " ++ show x
show (IntegerLit x) = "literal " ++ show x
show (StringLit x) = "string " ++ show x
-- Identifiers
show (HoleIdent x) = "hole identifier " ++ x
show (Ident x) = "identifier " ++ x
show (NamespacedIdent xs) = "namespaced identifier " ++ dotSep (reverse xs)
show (RecordField x) = "record field " ++ x
show (Symbol x) = "symbol " ++ x
show (Keyword x) = x
show (Unrecognised x) = "Unrecognised " ++ x
-- Comments
show (Comment _) = "comment"
show (DocComment _) = "doc comment"
-- Special
show (CGDirective x) = "CGDirective " ++ x
show (RecordField x) = "record field " ++ x
show (Pragma x) = "pragma " ++ x
show EndInput = "end of input"
show (NSIdent [x]) = "identifier " ++ x
show (NSIdent xs) = "namespaced identifier " ++ dotSep (reverse xs)
where
dotSep : List String -> String
dotSep [] = ""
dotSep [x] = x
dotSep (x :: xs) = x ++ concat ["." ++ y | y <- xs]
show (Keyword x) = x
show (Pragma x) = "pragma " ++ x
show (Unrecognised x) = "Unrecognised " ++ x

mutual
||| The mutually defined functions represent different states in a
Expand Down Expand Up @@ -102,29 +106,17 @@ blockComment = is '{' <+> is '-' <+> toEndComment 1
docComment : Lexer
docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n')

export
isIdentNormal : String -> Bool
isIdentNormal = isIdent Normal

export
identNormal : Lexer
identNormal = ident Normal

export
identAllowDashes : Lexer
identAllowDashes = ident AllowDashes

holeIdent : Lexer
holeIdent = is '?' <+> ident Normal
holeIdent = is '?' <+> identNormal

nsIdent : Lexer
nsIdent = ident Capitalised <+> many (is '.' <+> ident Normal)
namespacedIdent : Lexer
namespacedIdent = namespaceIdent <+> opt (is '.' <+> identNormal)

recField : Lexer
recField = is '.' <+> ident Normal
recField = is '.' <+> identNormal

pragma : Lexer
pragma = is '%' <+> ident Normal
pragma = is '%' <+> identNormal

doubleLit : Lexer
doubleLit
Expand Down Expand Up @@ -171,7 +163,6 @@ symbols
"(", ")", "{", "}", "[", "]", ",", ";", "_",
"`(", "`"]


export
isOpChar : Char -> Bool
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
Expand Down Expand Up @@ -205,6 +196,8 @@ fromOctLit str
Nothing => 0 -- can't happen if the literal lexed correctly
Just n => cast n

%hide Prelude.reverse

rawTokens : TokenMap SourceToken
rawTokens =
[(comment, Comment),
Expand All @@ -214,27 +207,26 @@ rawTokens =
(holeIdent, \x => HoleIdent (assert_total (strTail x)))] ++
map (\x => (exact x, Symbol)) symbols ++
[(doubleLit, \x => DoubleLit (cast x)),
(hexLit, \x => Literal (fromHexLit x)),
(octLit, \x => Literal (fromOctLit x)),
(digits, \x => Literal (cast x)),
(stringLit, \x => StrLit (stripQuotes x)),
(hexLit, \x => IntegerLit (fromHexLit x)),
(octLit, \x => IntegerLit (fromOctLit x)),
(digits, \x => IntegerLit (cast x)),
(stringLit, \x => StringLit (stripQuotes x)),
(charLit, \x => CharLit (stripQuotes x)),
(recField, \x => RecordField (assert_total $ strTail x)),
(nsIdent, parseNSIdent),
(ident Normal, parseIdent),
(namespacedIdent, parseNamespace),
(identNormal, parseIdent),
(pragma, \x => Pragma (assert_total $ strTail x)),
(space, Comment),
(validSymbol, Symbol),
(symbol, Unrecognised)]
where
parseNSIdent : String -> SourceToken
parseNSIdent = NSIdent . reverse . split (== '.')

parseIdent : String -> SourceToken
parseIdent x =
if x `elem` keywords
then Keyword x
else NSIdent [x]
parseIdent x = if x `elem` keywords then Keyword x
else Ident x
parseNamespace : String -> SourceToken
parseNamespace ns = case reverse . split (== '.') $ ns of
[ident] => parseIdent ident
ns => NamespacedIdent ns

export
lexTo : (TokenData SourceToken -> Bool) ->
Expand Down
Loading

0 comments on commit d089ad6

Please sign in to comment.