-
Notifications
You must be signed in to change notification settings - Fork 459
Institute parser for Core. #42
Changes from all commits
e8ac13f
2b446df
132c30a
11a5217
e9b0c45
01a4bd2
f0c880c
b135c62
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| # Semantic Core grammar | ||
|
|
||
| This is an EBNF grammar for the (experimental) core IR language. | ||
|
|
||
| ``` | ||
| expr ::= expr '.' expr | ||
| | expr ' '+ expr | ||
| | '{' expr (';' expr)* ';'? '}' | ||
| | 'if' expr 'then' expr 'else' expr | ||
| | ('lexical' | 'import' | 'load') expr | ||
| | lit | ||
| | 'let'? lvalue '=' expr | ||
| | '(' expr ')' | ||
|
|
||
| lvalue ::= ident | ||
| | parens expr | ||
|
|
||
| lit ::= '#true' | ||
| | '#false' | ||
| | 'unit' | ||
| | 'frame' | ||
| | lambda | ||
| | ident | ||
|
|
||
| lambda ::= ('λ' | '\') ident ('->' | '→') expr | ||
|
|
||
| ident ::= [A-z_] ([A-z0-9_])* | ||
| | '#{' [^{}]+ '}' | ||
| | '"' [^"]+ '"' | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| module Data.Core.Parser | ||
| ( module Text.Trifecta | ||
| , core | ||
| , lit | ||
| , expr | ||
| , lvalue | ||
| ) where | ||
|
|
||
| -- Consult @doc/grammar.md@ for an EBNF grammar. | ||
patrickt marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| import Control.Applicative | ||
| import qualified Data.Char as Char | ||
| import Data.Core | ||
| import Data.Name | ||
| import Data.Semigroup | ||
| import Data.String | ||
| import qualified Text.Parser.Token as Token | ||
| import qualified Text.Parser.Token.Highlight as Highlight | ||
| import Text.Trifecta hiding (ident) | ||
|
|
||
| -- * Identifier styles and derived parsers | ||
|
|
||
| validIdentifierStart :: Char -> Bool | ||
| validIdentifierStart c = not (Char.isDigit c) && isSimpleCharacter c | ||
|
|
||
| coreIdents :: TokenParsing m => IdentifierStyle m | ||
| coreIdents = Token.IdentifierStyle | ||
| { _styleName = "core" | ||
| , _styleStart = satisfy validIdentifierStart | ||
| , _styleLetter = satisfy isSimpleCharacter | ||
| , _styleReserved = reservedNames | ||
| , _styleHighlight = Highlight.Identifier | ||
| , _styleReservedHighlight = Highlight.ReservedIdentifier | ||
| } | ||
|
|
||
| reserved :: (TokenParsing m, Monad m) => String -> m () | ||
| reserved = Token.reserve coreIdents | ||
|
|
||
| identifier :: (TokenParsing m, Monad m, IsString s) => m s | ||
| identifier = choice [quote, plain] <?> "identifier" where | ||
| plain = Token.ident coreIdents | ||
| quote = between (string "#{") (symbol "}") (fromString <$> some (noneOf "{}")) | ||
|
|
||
| -- * Parsers (corresponding to EBNF) | ||
|
|
||
| core :: (TokenParsing m, Monad m) => m Core | ||
| core = expr | ||
|
|
||
| expr :: (TokenParsing m, Monad m) => m Core | ||
| expr = atom `chainl1` go where | ||
| go = choice [ (:.) <$ dot | ||
| , (:$) <$ notFollowedBy dot | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. argh I still love this so much |
||
| ] | ||
|
|
||
| atom :: (TokenParsing m, Monad m) => m Core | ||
| atom = choice | ||
| [ comp | ||
| , ifthenelse | ||
| , edge | ||
| , lit | ||
| , ident | ||
| , assign | ||
| , parens expr | ||
| ] | ||
|
|
||
| comp :: (TokenParsing m, Monad m) => m Core | ||
| comp = braces (sconcat <$> sepEndByNonEmpty expr semi) <?> "compound statement" | ||
|
|
||
| ifthenelse :: (TokenParsing m, Monad m) => m Core | ||
| ifthenelse = If | ||
| <$ reserved "if" <*> core | ||
| <* reserved "then" <*> core | ||
| <* reserved "else" <*> core | ||
| <?> "if-then-else statement" | ||
|
|
||
| assign :: (TokenParsing m, Monad m) => m Core | ||
| assign = (:=) <$> try (lvalue <* symbolic '=') <*> core <?> "assignment" | ||
|
|
||
| edge :: (TokenParsing m, Monad m) => m Core | ||
| edge = kw <*> expr where kw = choice [ Edge Lexical <$ reserved "lexical" | ||
| , Edge Import <$ reserved "import" | ||
| , Load <$ reserved "load" | ||
| ] | ||
|
|
||
| lvalue :: (TokenParsing m, Monad m) => m Core | ||
| lvalue = choice | ||
| [ Let <$ reserved "let" <*> name | ||
| , ident | ||
| , parens expr | ||
| ] | ||
|
|
||
| -- * Literals | ||
|
|
||
| name :: (TokenParsing m, Monad m) => m Name | ||
| name = choice [regular, strpath] <?> "name" where | ||
| regular = User <$> identifier | ||
| strpath = Path <$> between (symbolic '"') (symbolic '"') (some $ noneOf "\"") | ||
|
|
||
| lit :: (TokenParsing m, Monad m) => m Core | ||
| lit = let x `given` n = x <$ reserved n in choice | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I always forget you can define infix functions this way, just like any other operator.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup! And you can give them a custom precedence, too! |
||
| [ Bool True `given` "#true" | ||
| , Bool False `given` "#false" | ||
| , Unit `given` "#unit" | ||
| , Frame `given` "#frame" | ||
| , lambda | ||
| ] <?> "literal" | ||
|
|
||
| lambda :: (TokenParsing m, Monad m) => m Core | ||
| lambda = Lam <$ lambduh <*> name <* arrow <*> core <?> "lambda" where | ||
| lambduh = symbolic 'λ' <|> symbolic '\\' | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 😂
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on an Emacs function I have to insert a literal lambda: obviously, |
||
| arrow = symbol "→" <|> symbol "->" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I’m still kind of ambivalent about unicode symbols in general, but the pretty-printer is awfully nice and round-tripping seems like a good property, so 👍 |
||
|
|
||
| ident :: (Monad m, TokenParsing m) => m Core | ||
| ident = Var <$> name <?> "identifier" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool!