diff --git a/compiler/Setup.hs b/compiler/Setup.hs new file mode 100644 index 00000000..9a994af6 --- /dev/null +++ b/compiler/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/compiler/capnproto-compiler.cabal b/compiler/capnproto-compiler.cabal new file mode 100644 index 00000000..df34f2ed --- /dev/null +++ b/compiler/capnproto-compiler.cabal @@ -0,0 +1,21 @@ +name: capnproto-compiler +version: 0.1 +cabal-version: >=1.2 +build-type: Simple +author: kenton + +executable capnproto-compiler + hs-source-dirs: src + main-is: Main.hs + build-depends: + base >= 4, + parsec, + mtl, + containers + ghc-options: -Wall -fno-warn-missing-signatures + other-modules: + Lexer, + Token, + Grammar, + Parser + diff --git a/compiler/src/Grammar.hs b/compiler/src/Grammar.hs new file mode 100644 index 00000000..7629984e --- /dev/null +++ b/compiler/src/Grammar.hs @@ -0,0 +1,54 @@ +-- Copyright (c) 2013, Kenton Varda +-- All rights reserved. +-- +-- Redistribution and use in source and binary forms, with or without +-- modification, are permitted provided that the following conditions are met: +-- +-- 1. Redistributions of source code must retain the above copyright notice, this +-- list of conditions and the following disclaimer. +-- 2. Redistributions in binary form must reproduce the above copyright notice, +-- this list of conditions and the following disclaimer in the documentation +-- and/or other materials provided with the distribution. +-- +-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +module Grammar where + +data DeclName = AbsoluteName String + | RelativeName String + | ImportName String + | MemberName DeclName String + deriving (Show) + +data TypeExpression = TypeName DeclName + | Array TypeExpression + deriving (Show) + +data FieldValue = VoidFieldValue + | BoolFieldValue Bool + | IntegerFieldValue Integer + | FloatFieldValue Double + | StringFieldValue String + | ArrayFieldValue [FieldValue] + | RecordFieldValue [(String, FieldValue)] + deriving (Show) + +data Declaration = ConstantDecl String (Maybe TypeExpression) FieldValue + | EnumDecl String [Declaration] + | EnumValueDecl String Integer [Declaration] + | ClassDecl String [Declaration] + | FieldDecl String Integer TypeExpression FieldValue [Declaration] + | InterfaceDecl String [Declaration] + | MethodDecl String [(String, TypeExpression, FieldValue)] + TypeExpression [Declaration] + | OptionDecl DeclName FieldValue + deriving (Show) diff --git a/compiler/src/Lexer.hs b/compiler/src/Lexer.hs new file mode 100644 index 00000000..59c02789 --- /dev/null +++ b/compiler/src/Lexer.hs @@ -0,0 +1,113 @@ +-- Copyright (c) 2013, Kenton Varda +-- All rights reserved. +-- +-- Redistribution and use in source and binary forms, with or without +-- modification, are permitted provided that the following conditions are met: +-- +-- 1. Redistributions of source code must retain the above copyright notice, this +-- list of conditions and the following disclaimer. +-- 2. Redistributions in binary form must reproduce the above copyright notice, +-- this list of conditions and the following disclaimer in the documentation +-- and/or other materials provided with the distribution. +-- +-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +module Lexer (lexer) where + +import Text.Parsec hiding (token, tokens) +import Text.Parsec.String +import Control.Monad (liftM) +import qualified Text.Parsec.Token as T +import Text.Parsec.Language (emptyDef) +import Token + +keywords = + [ (ImportKeyword, "import") + , (ConstKeyword, "const") + , (EnumKeyword, "enum") + , (ClassKeyword, "class") + , (InterfaceKeyword, "interface") + , (OptionKeyword, "option") + ] + +languageDef :: T.LanguageDef st +languageDef = emptyDef + { T.commentLine = "#" + , T.identStart = letter <|> char '_' + , T.identLetter = alphaNum <|> char '_' + , T.reservedNames = [name | (_, name) <- keywords] + , T.opStart = T.opLetter languageDef + , T.opLetter = fail "There are no operators." + } + +tokenParser = T.makeTokenParser languageDef + +identifier = T.identifier tokenParser +reserved = T.reserved tokenParser +symbol = T.symbol tokenParser +naturalOrFloat = T.naturalOrFloat tokenParser +braces = T.braces tokenParser +parens = T.parens tokenParser +brackets = T.brackets tokenParser +whiteSpace = T.whiteSpace tokenParser +stringLiteral = T.stringLiteral tokenParser + +keyword :: Parser Token +keyword = foldl1 (<|>) [reserved name >> return t | (t, name) <- keywords] + +toLiteral :: Either Integer Double -> Token +toLiteral (Left i) = LiteralInt i +toLiteral (Right d) = LiteralFloat d + +located :: Parser t -> Parser (Located t) +located p = do + pos <- getPosition + t <- p + return (Located pos t) + +token :: Parser Token +token = keyword + <|> liftM Identifier identifier + <|> liftM ParenthesizedList (parens (sepBy (many locatedToken) (symbol ","))) + <|> liftM BracketedList (brackets (sepBy (many locatedToken) (symbol ","))) + <|> liftM toLiteral naturalOrFloat + <|> liftM LiteralString stringLiteral + <|> liftM (const AtSign) (symbol "@") + <|> liftM (const Colon) (symbol ":") + <|> liftM (const Period) (symbol ".") + <|> liftM (const EqualsSign) (symbol "=") + "token" + +locatedToken = located token + +statementEnd :: Parser (Maybe [Located Statement]) +statementEnd = (symbol ";" >>= \_ -> return Nothing) + <|> (braces (many locatedStatement) >>= \statements -> return (Just statements)) + +compileStatement :: [Located Token] -> Maybe [Located Statement] -> Statement +compileStatement tokens Nothing = Line tokens +compileStatement tokens (Just statements) = Block tokens statements + +statement :: Parser Statement +statement = do + tokens <- many locatedToken + end <- statementEnd + return (compileStatement tokens end) + +locatedStatement = located statement + +lexer :: Parser [Located Statement] +lexer = do + whiteSpace + tokens <- many locatedStatement + eof + return tokens diff --git a/compiler/src/Main.hs b/compiler/src/Main.hs new file mode 100644 index 00000000..1dd1e0e0 --- /dev/null +++ b/compiler/src/Main.hs @@ -0,0 +1,27 @@ +-- Copyright (c) 2013, Kenton Varda +-- All rights reserved. +-- +-- Redistribution and use in source and binary forms, with or without +-- modification, are permitted provided that the following conditions are met: +-- +-- 1. Redistributions of source code must retain the above copyright notice, this +-- list of conditions and the following disclaimer. +-- 2. Redistributions in binary form must reproduce the above copyright notice, +-- this list of conditions and the following disclaimer in the documentation +-- and/or other materials provided with the distribution. +-- +-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +module Main ( main ) where + +main::IO() +main = undefined diff --git a/compiler/src/Parser.hs b/compiler/src/Parser.hs new file mode 100644 index 00000000..5f669c94 --- /dev/null +++ b/compiler/src/Parser.hs @@ -0,0 +1,242 @@ +-- Copyright (c) 2013, Kenton Varda +-- All rights reserved. +-- +-- Redistribution and use in source and binary forms, with or without +-- modification, are permitted provided that the following conditions are met: +-- +-- 1. Redistributions of source code must retain the above copyright notice, this +-- list of conditions and the following disclaimer. +-- 2. Redistributions in binary form must reproduce the above copyright notice, +-- this list of conditions and the following disclaimer in the documentation +-- and/or other materials provided with the distribution. +-- +-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +module Parser (parseFile) where + +import Text.Parsec hiding (tokens) +import Token +import Control.Monad (liftM) +import Grammar +import Lexer (lexer) + +tokenParser :: (Located Token -> Maybe a) -> Parsec [Located Token] u a +tokenParser = token (show . locatedValue) locatedPos + +type TokenParser = Parsec [Located Token] [ParseError] + +-- Hmm, boilerplate is not supposed to happen in Haskell. +matchIdentifier t = case locatedValue t of { (Identifier v) -> Just v; _ -> Nothing } +matchParenthesizedList t = case locatedValue t of { (ParenthesizedList v) -> Just v; _ -> Nothing } +matchBracketedList t = case locatedValue t of { (BracketedList v) -> Just v; _ -> Nothing } +matchLiteralInt t = case locatedValue t of { (LiteralInt v) -> Just v; _ -> Nothing } +matchLiteralFloat t = case locatedValue t of { (LiteralFloat v) -> Just v; _ -> Nothing } +matchLiteralString t = case locatedValue t of { (LiteralString v) -> Just v; _ -> Nothing } +matchSimpleToken expected t = if locatedValue t == expected then Just () else Nothing + +identifier = tokenParser matchIdentifier +literalInt = tokenParser matchLiteralInt +literalFloat = tokenParser matchLiteralFloat +literalString = tokenParser matchLiteralString + +atSign = tokenParser (matchSimpleToken AtSign) +colon = tokenParser (matchSimpleToken Colon) +period = tokenParser (matchSimpleToken Period) +equalsSign = tokenParser (matchSimpleToken EqualsSign) +importKeyword = tokenParser (matchSimpleToken ImportKeyword) +constKeyword = tokenParser (matchSimpleToken ConstKeyword) +enumKeyword = tokenParser (matchSimpleToken EnumKeyword) +classKeyword = tokenParser (matchSimpleToken ClassKeyword) +interfaceKeyword = tokenParser (matchSimpleToken InterfaceKeyword) +optionKeyword = tokenParser (matchSimpleToken OptionKeyword) + +parenthesizedList parser = do + items <- tokenParser matchParenthesizedList + parseList parser items +bracketedList parser = do + items <- tokenParser matchBracketedList + parseList parser items + +declNameBase :: TokenParser DeclName +declNameBase = liftM ImportName (importKeyword >> literalString) + <|> liftM AbsoluteName (period >> identifier) + <|> liftM RelativeName identifier + +declName :: TokenParser DeclName +declName = do + base <- declNameBase + members <- many (period >> identifier) + return (foldl MemberName base members :: DeclName) + +typeExpression :: TokenParser TypeExpression +typeExpression = do + name <- declName + suffixes <- many (bracketedList (fail "Brackets should be empty.")) + return (applySuffixes (TypeName name) (length suffixes)) where + applySuffixes t 0 = t + applySuffixes t n = applySuffixes (Array t) (n - 1) + +topLine :: Maybe [Located Statement] -> TokenParser Declaration +topLine Nothing = optionDecl <|> constantDecl <|> implicitConstantDecl +topLine (Just statements) = typeDecl statements + +constantDecl = do + constKeyword + implicitConstantDecl + +implicitConstantDecl = do + name <- identifier + typeName <- optionMaybe (period >> typeExpression) + equalsSign + value <- fieldValue + return (ConstantDecl name typeName value) + +typeDecl statements = enumDecl statements + <|> classDecl statements + <|> interfaceDecl statements + +enumDecl statements = do + enumKeyword + name <- identifier + children <- parseBlock enumLine statements + return (EnumDecl name children) + +enumLine :: Maybe [Located Statement] -> TokenParser Declaration +enumLine Nothing = optionDecl <|> enumValueDecl [] +enumLine (Just statements) = enumValueDecl statements + +enumValueDecl statements = do + name <- identifier + equalsSign + value <- literalInt + children <- parseBlock enumValueLine statements + return (EnumValueDecl name value children) + +enumValueLine :: Maybe [Located Statement] -> TokenParser Declaration +enumValueLine Nothing = optionDecl +enumValueLine (Just _) = fail "Blocks not allowed here." + +classDecl statements = do + classKeyword + name <- identifier + children <- parseBlock classLine statements + return (ClassDecl name children) + +classLine :: Maybe [Located Statement] -> TokenParser Declaration +classLine Nothing = optionDecl <|> constantDecl <|> fieldDecl [] +classLine (Just statements) = typeDecl statements <|> fieldDecl statements + +fieldDecl statements = do + name <- identifier + atSign + ordinal <- literalInt + colon + t <- typeExpression + value <- option VoidFieldValue (equalsSign >> fieldValue) + children <- parseBlock fieldLine statements + return (FieldDecl name ordinal t value children) + +fieldValue = liftM IntegerFieldValue literalInt + <|> liftM FloatFieldValue literalFloat + <|> liftM StringFieldValue literalString + <|> liftM ArrayFieldValue (bracketedList fieldValue) + <|> liftM RecordFieldValue (parenthesizedList fieldAssignment) + +fieldAssignment = do + name <- identifier + equalsSign + value <- fieldValue + return (name, value) + +fieldLine :: Maybe [Located Statement] -> TokenParser Declaration +fieldLine Nothing = optionDecl +fieldLine (Just _) = fail "Blocks not allowed here." + +interfaceDecl statements = do + interfaceKeyword + name <- identifier + children <- parseBlock interfaceLine statements + return (InterfaceDecl name children) + +interfaceLine :: Maybe [Located Statement] -> TokenParser Declaration +interfaceLine Nothing = optionDecl <|> constantDecl <|> methodDecl [] +interfaceLine (Just statements) = typeDecl statements <|> methodDecl statements + +methodDecl statements = do + name <- identifier + params <- parenthesizedList paramDecl + t <- typeExpression + children <- parseBlock methodLine statements + return (MethodDecl name params t children) + +paramDecl = do + name <- identifier + colon + t <- typeExpression + value <- option VoidFieldValue (equalsSign >> fieldValue) + return (name, t, value) + +methodLine :: Maybe [Located Statement] -> TokenParser Declaration +methodLine Nothing = optionDecl +methodLine (Just _) = fail "Blocks not allowed here." + +optionDecl = do + optionKeyword + name <- declName + equalsSign + value <- fieldValue + return (OptionDecl name value) + +extractErrors :: Either ParseError (a, [ParseError]) -> [ParseError] +extractErrors (Left err) = [err] +extractErrors (Right (_, errors)) = errors + +parseList parser items = finish where + results = map (parseCollectingErrors parser) items + finish = do + modifyState (\old -> concat (old:map extractErrors results)) + return [ result | Right (result, _) <- results ] + +parseBlock :: (Maybe [Located Statement] -> TokenParser Declaration) + -> [Located Statement] -> TokenParser [Declaration] +parseBlock parser statements = finish where + results = map (parseStatement parser) statements + finish = do + modifyState (\old -> concat (old:map extractErrors results)) + return [ result | Right (result, _) <- results ] + +parseCollectingErrors :: TokenParser a -> [Located Token] -> Either ParseError (a, [ParseError]) +parseCollectingErrors parser = runParser parser' [] "" where + parser' = do + result <- parser + eof + errors <- getState + return (result, errors) + +parseStatement :: (Maybe [Located Statement] -> TokenParser Declaration) + -> Located Statement + -> Either ParseError (Declaration, [ParseError]) +parseStatement parser (Located _ (Line tokens)) = + parseCollectingErrors (parser Nothing) tokens +parseStatement parser (Located _ (Block tokens statements)) = + parseCollectingErrors (parser (Just statements)) tokens + +parseFileTokens :: [Located Statement] -> ([Declaration], [ParseError]) +parseFileTokens statements = (decls, errors) where + results = map (parseStatement topLine) statements + errors = concatMap extractErrors results + decls = [ result | Right (result, _) <- results ] + +parseFile :: String -> String -> ([Declaration], [ParseError]) +parseFile filename text = case parse lexer filename text of + Left e -> ([], [e]) + Right tokens -> parseFileTokens tokens diff --git a/compiler/src/Token.hs b/compiler/src/Token.hs new file mode 100644 index 00000000..8aaa56a1 --- /dev/null +++ b/compiler/src/Token.hs @@ -0,0 +1,65 @@ +-- Copyright (c) 2013, Kenton Varda +-- All rights reserved. +-- +-- Redistribution and use in source and binary forms, with or without +-- modification, are permitted provided that the following conditions are met: +-- +-- 1. Redistributions of source code must retain the above copyright notice, this +-- list of conditions and the following disclaimer. +-- 2. Redistributions in binary form must reproduce the above copyright notice, +-- this list of conditions and the following disclaimer in the documentation +-- and/or other materials provided with the distribution. +-- +-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +module Token where + +import Data.Char (toLower) +import Text.Parsec.Pos (SourcePos, sourceLine, sourceColumn) +import Text.Printf (printf) + +data PrimitiveType = Void | Bool + | Int8 | Int16 | Int32 | Int64 + | UInt8 | UInt16 | UInt32 | UInt64 + | Float32 | Float64 + | Text | Bytes + deriving (Show, Enum, Bounded, Eq) + +primitiveTypes = [(t, map toLower (show t)) + | t <- [minBound::PrimitiveType .. maxBound::PrimitiveType]] + +data Located t = Located { locatedPos :: SourcePos, locatedValue :: t } deriving (Eq) + +instance Show t => Show (Located t) where + show (Located pos x) = printf "%d:%d:%s" (sourceLine pos) (sourceColumn pos) (show x) + +data Token = Identifier String + | ParenthesizedList [[Located Token]] + | BracketedList [[Located Token]] + | LiteralInt Integer + | LiteralFloat Double + | LiteralString String + | AtSign + | Colon + | Period + | EqualsSign + | ImportKeyword + | ConstKeyword + | EnumKeyword + | ClassKeyword + | InterfaceKeyword + | OptionKeyword + deriving (Show, Eq) + +data Statement = Line [Located Token] + | Block [Located Token] [Located Statement] + deriving (Show)