Permalink
Browse files

First version

darcs-hash:20060719141025-974a0-6e9a6d13bd21a82dcbbed2e5c40c9aaabf109000.gz
  • Loading branch information...
0 parents commit 9bdcc46bbe0072eead86467ac2dcb7a4efef6a4c eb committed Jul 19, 2006
@@ -0,0 +1,176 @@
+> module EMachine.Bytecode where
+
+> import Control.Monad.State
+> import EMachine.Language
+
+> type Local = Int
+> type Tag = Int
+> type TmpVar = Int
+
+Register based - most operations do an action, then put the result in a
+'TmpVar' which is basically a numbered register. There are infinite registers
+at this stage.
+
+> data ByteOp = CALL TmpVar Name [TmpVar]
+> | THUNK TmpVar Int Name [TmpVar]
+> | ADDARGS TmpVar TmpVar [TmpVar]
+> | FOREIGN Type TmpVar String [(TmpVar, Type)]
+> | VAR TmpVar Local
+> | ASSIGN Local TmpVar
+> | CON TmpVar Tag [TmpVar]
+> | UNIT TmpVar
+> | INT TmpVar Int
+> | FLOAT TmpVar Float
+> | STRING TmpVar String
+> | PROJ TmpVar TmpVar Int -- project into a register
+> | PROJVAR Local TmpVar Int -- project into a local variable
+> | CASE TmpVar [Bytecode]
+> | OP TmpVar Op TmpVar TmpVar
+> | LOCALS Int -- allocate space for locals
+> | TMPS Int -- declare temporary variables
+> | EVAL TmpVar
+> -- | LET TmpVar Local TmpVar
+> | RETURN TmpVar
+> | ERROR String -- Fatal error, exit
+> deriving Show
+
+> type Bytecode = [ByteOp]
+
+> data FunCode = Code [Type] Bytecode
+> deriving Show
+
+> data CompileState = CS { arg_types :: [Type],
+> num_locals :: Int,
+> next_tmp :: Int }
+
+> compile :: Context -> Func -> FunCode
+> compile ctxt fn@(Bind args locals def) =
+> let cs = (CS (map snd args) (length args) 1)
+> code = evalState (scompile ctxt fn) cs in
+> Code (map snd args) code
+
+> scompile :: Context -> Func -> State CompileState Bytecode
+> scompile ctxt (Bind args locals def) =
+> do -- put (CS args (length args) 1)
+> code <- ecomp def 0
+> cs <- get
+> return $ (LOCALS locals):(TMPS (next_tmp cs)):code++[RETURN 0]
+
+> where
+
+> new_tmp :: State CompileState Int
+> new_tmp = do cs <- get
+> let reg' = next_tmp cs
+> put (cs { next_tmp = reg'+1 } )
+> return reg'
+
+Add some locals, return de Bruijn level of first new one.
+
+> new_locals :: Int -> State CompileState Int
+> new_locals args =
+> do cs <- get
+> let loc = num_locals cs
+> put (cs { num_locals = loc+args } )
+> return loc
+
+Take an expression and the register (TmpVar) to put the result into;
+compile code to do just that.
+
+> ecomp :: Expr -> TmpVar -> State CompileState Bytecode
+> ecomp (V v) reg =
+> do return [VAR reg v]
+> ecomp (R x) reg = acomp (R x) [] reg
+> ecomp (App f as) reg = acomp f as reg
+> ecomp (Con t as) reg =
+> do (argcode, argregs) <- ecomps as
+> return $ argcode ++ [CON reg t argregs]
+> ecomp (Proj con i) reg =
+> do reg' <- new_tmp
+> concode <- ecomp con reg'
+> return [PROJ reg reg' i]
+> ecomp (Const c) reg = ccomp c reg
+> ecomp (Case scrutinee alts) reg =
+> do screg <- new_tmp
+> sccode <- ecomp scrutinee screg
+> altcode <- altcomps alts screg reg
+> return $ sccode ++ [EVAL screg, CASE screg altcode]
+> ecomp (Op op l r) reg =
+> do lreg <- new_tmp
+> rreg <- new_tmp
+> lcode <- ecomp l lreg
+> rcode <- ecomp r rreg
+> return $ lcode ++ rcode ++ [OP reg op lreg rreg]
+> ecomp (Let nm ty val scope) reg =
+> do loc <- new_locals 1
+> reg' <- new_tmp
+> valcode <- ecomp val reg'
+> scopecode <- ecomp scope reg
+> return $ valcode ++ (ASSIGN loc reg'):scopecode
+> ecomp (Error str) reg = return [ERROR str]
+> ecomp Impossible reg = return [ERROR "The impossible happened."]
+> ecomp (ForeignCall ty fn argtypes) reg = do
+> let (args,types) = unzip argtypes
+> (argcode, argregs) <- ecomps args
+> let evalcode = map EVAL argregs
+> return $ argcode ++ evalcode ++ [FOREIGN ty reg fn (zip argregs types)]
+
+> ecomps :: [Expr] -> State CompileState (Bytecode, [TmpVar])
+> ecomps e = ecomps' [] [] e
+> ecomps' code tmps [] = return (code, tmps)
+> ecomps' code tmps (e:es) =
+> do reg <- new_tmp
+> ecode <- ecomp e reg
+> ecomps' (code++ecode) (tmps++[reg]) es
+
+Compile case alternatives.
+FIXME: Reorder so that the tags are in order and gaps are filled with error
+case.
+
+> altcomps :: [CaseAlt] -> TmpVar -> TmpVar ->
+> State CompileState [Bytecode]
+> altcomps [] _ _ = return []
+> altcomps (a:as) scrutinee reg =
+> do acode <- altcomp a scrutinee reg
+> ascode <- altcomps as scrutinee reg
+> return (acode:ascode)
+
+Assume that all the tags are in order, and unused constructors have
+a default inserted (i.e., tag can be ignored).
+
+> altcomp :: CaseAlt -> TmpVar -> TmpVar -> State CompileState Bytecode
+> altcomp (Alt tag nmargs expr) scrutinee reg =
+> do let args = map snd nmargs
+> local <- new_locals (length args)
+> projcode <- project args scrutinee local 0
+> exprcode <- ecomp expr reg
+> return (projcode++exprcode)
+
+> project [] _ _ _ = return []
+> project (_:as) scr loc arg =
+> do let acode = PROJVAR loc scr arg
+> ascode <- project as scr (loc+1) (arg+1)
+> return (acode:ascode)
+
+Compile an application of a function to arguments
+
+> acomp :: Expr -> [Expr] -> TmpVar -> State CompileState Bytecode
+> acomp (R x) args reg
+> | arity x ctxt == length args =
+> do (argcode, argregs) <- ecomps args
+> return $ argcode ++ [CALL reg x argregs]
+> | otherwise =
+> do (argcode, argregs) <- ecomps args
+> return $ argcode ++ [THUNK reg (arity x ctxt) x argregs]
+> acomp f args reg
+> = do (argcode, argregs) <- ecomps args
+> reg' <- new_tmp
+> fcode <- ecomp f reg'
+> return $ fcode ++ argcode ++ [ADDARGS reg reg' argregs]
+
+> ccomp (MkInt i) reg = return [INT reg i]
+> ccomp (MkChar c) reg = return [INT reg (fromEnum c)]
+> ccomp (MkFloat f) reg = return [FLOAT reg f]
+> ccomp (MkBool b) reg = return [INT reg (if b then 1 else 0)]
+> ccomp (MkString s) reg = return [STRING reg s]
+> ccomp (MkUnit) reg = return [UNIT reg]
+
@@ -0,0 +1,145 @@
+> module EMachine.CodegenC where
+
+> import Control.Monad.State
+
+> import EMachine.Language
+> import EMachine.Bytecode
+
+> codegenC :: Context -> [Decl] -> String
+> codegenC ctxt decs =
+> fileHeader ++
+> headers decs ++ "\n" ++
+> wrappers decs ++
+> workers ctxt decs ++
+> mainDriver
+
+> fileHeader = "#include \"closure.h\"\n#include <assert.h>\n\n"
+> mainDriver = "int main() { _do__U_main(); return 0; }\n"
+
+> showarg _ i = "void* " ++ loc i
+
+> showargs [] i= ""
+> showargs [x] i = showarg x i
+> showargs (x:xs) i = showarg x i ++ ", " ++ showargs xs (i+1)
+
+> headers [] = ""
+> headers ((Decl fname ret (Bind args _ _)):xs) =
+> "void* " ++ thunk fname ++ "(void** block);\n" ++
+> "void* " ++ quickcall fname ++ "(" ++ showargs args 0 ++ ");\n" ++
+> headers xs
+> headers (_:xs) = headers xs
+
+> wrappers [] = ""
+> wrappers ((Decl fname ret (Bind args _ _)):xs) =
+> "void* " ++ thunk fname ++ "(void** block) {\n return " ++
+> quickcall fname ++ "(" ++
+> wrapperArgs (length args) ++ ");\n}\n\n" ++
+> wrappers xs
+> wrappers (_:xs) = wrappers xs
+
+> wrapperArgs 0 = ""
+> wrapperArgs 1 = "block[0]"
+> wrapperArgs x = wrapperArgs (x-1) ++ ", block[" ++ show (x-1) ++ "]"
+
+> workers _ [] = ""
+> workers ctxt ((Decl fname ret func@(Bind args locals defn)):xs) =
+> "void* " ++ quickcall fname ++ "(" ++ showargs args 0 ++ ") {\n" ++
+> compileBody (compile ctxt func) ++ "\n}\n\n" ++
+> workers ctxt xs
+> workers ctxt (_:xs) = workers ctxt xs
+
+> tmp v = "tmp" ++ show v
+> loc v = "var" ++ show v
+
+> quickcall fn = "_do_" ++ show fn
+> thunk fn = "_wrap_" ++ show fn
+
+> compileBody :: FunCode -> String
+> compileBody (Code args bytecode) =
+> let (code, b) = runState (cgs bytecode) False in
+> if b then "void** block;\n"++code else code
+> where
+> cgs [] = return ""
+> cgs (x:xs) = do xc <- cg x
+> xsc <- cgs xs
+> return $ xc ++ "\n" ++ xsc
+
+> cg (CALL t fn args) = return $ tmp t ++ " = " ++ quickcall fn ++
+> targs "(" args ++ ");"
+> cg (THUNK t ar fn args) = do
+> put True
+> return $ argblock "block" args ++ tmp t ++
+> " = (void*)CLOSURE(" ++ thunk fn ++ ", " ++
+> show ar ++ "," ++ show (length args) ++
+> ", block);"
+> cg (ADDARGS t th args) = do put True
+> return $ argblock "block" args ++ tmp t ++
+> " = CLOSURE_ADDN((VAL)" ++
+> tmp th ++ ", " ++
+> show (length args) ++
+> ", block);"
+> cg (FOREIGN ty t fn args) = return $
+> castFrom t ty ++
+> fn ++ "(" ++ foreignArgs args ++ ");"
+> cg (VAR t l) = return $ tmp t ++ " = " ++ loc l ++ ";"
+> cg (ASSIGN l t) = return $ loc l ++ " = " ++ tmp t ++ ";"
+> cg (CON t tag args) = do put True
+> return $ argblock "block" args ++ tmp t ++
+> " = (void*)CONSTRUCTOR(" ++
+> show tag ++ ", " ++
+> show (length args) ++
+> ", block);"
+> cg (UNIT t) = return $ tmp t ++ " = MKUNIT;"
+> cg (INT t i) = return $ tmp t ++ " = MKINT("++show i++");"
+> cg (FLOAT t i) = return $ tmp t ++ " = MKFLOAT("++show i++");"
+> cg (STRING t s) = return $ tmp t ++ " = MKSTRING("++show s++");"
+> cg (PROJ t1 t2 i) = return $ tmp t1 ++ " = PROJECT((Closure*)"++tmp t2++", "++show i++");"
+> cg (PROJVAR l t i) = return $ loc l ++ " = PROJECT((Closure*)"++tmp t++", "++show i++");"
+> cg (OP t op l r) = return $ doOp t op l r
+> cg (LOCALS n) = return $ declare "void* " loc (length args) n
+> cg (TMPS n) = return $ declare "void* " tmp 0 n
+> cg (CASE v alts) = do
+> altscode <- cgalts alts 0
+> return $ "assert(ISCON("++tmp v++"));\n" ++
+> "switch(TAG(" ++ tmp v ++")) {\n" ++
+> altscode
+> ++ "}"
+> cg (EVAL v) = return $ "EVAL(&"++tmp v++");"
+> cg (RETURN t) = return $ "return "++tmp t++";"
+> cg (ERROR s) = return $ "ERROR("++show s++");"
+> -- cg x = return $ "NOP; // not done " ++ show x
+
+> cgalts [] _ = return $ ""
+> cgalts (bc:alts) tag = do bcode <- cgs bc
+> altscode <- cgalts alts (tag+1)
+> return $ "case "++show tag++":\n" ++
+> bcode ++ "break;\n" ++ altscode
+
+> targs st [] = st
+> targs st [x] = st ++ tmp x
+> targs st (x:xs) = st ++ tmp x ++ targs ", " xs
+
+> argblock name args = name ++ " = EMALLOC(sizeof(void*)*" ++ show (length args) ++ ");\n" ++ ab name args 0
+> ab nm [] i = ""
+> ab nm (x:xs) i = nm ++ "[" ++ show i ++ "] = " ++ tmp x ++";\n" ++
+> ab nm xs (i+1)
+
+> declare decl fn start end
+> | start == end = ""
+> | otherwise = decl ++ fn start ++";\n" ++
+> declare decl fn (start+1) end
+
+> foreignArgs [] = ""
+> foreignArgs [x] = foreignArg x
+> foreignArgs (x:xs) = foreignArg x ++ ", " ++ foreignArgs xs
+
+> castFrom t TyUnit = ""
+> castFrom t _ = tmp t ++ " = (void*)"
+
+> foreignArg (t, TyInt) = "GETINT("++ tmp t ++")"
+
+> doOp t Plus l r = tmp t ++ " = INTOP(+,"++tmp l ++ ", "++tmp r++");"
+> doOp t Minus l r = tmp t ++ " = INTOP(-,"++tmp l ++ ", "++tmp r++");"
+> doOp t Times l r = tmp t ++ " = INTOP(*,"++tmp l ++ ", "++tmp r++");"
+> doOp t Divide l r = tmp t ++ " = INTOP(/,"++tmp l ++ ", "++tmp r++");"
+
@@ -0,0 +1,39 @@
+> -- |
+> -- Module : EMachine.Compiler
+> -- Copyright : Edwin Brady
+> -- Licence : BSD-style (see LICENSE in the distribution)
+> --
+> -- Maintainer : eb@dcs.st-and.ac.uk
+> -- Stability : experimental
+> -- Portability : portable
+> --
+> -- Public interface for Epigram Supercombinator Compiler
+
+> module EMachine.Compiler(compile) where
+
+Brings everything together; parsing, checking, code generation
+
+> import System.IO
+
+> import EMachine.Language
+> import EMachine.Parser
+> import EMachine.Scopecheck
+> import EMachine.CodegenC
+
+> -- |Compile a source file in supercombinator language to C
+> compile :: FilePath -- ^ Input file
+> -> Handle -- ^ Output C filehandle
+> -> IO ()
+> compile fn outh
+> = do s <- parseFile fn
+> case s of
+> Failure err _ _ -> fail err
+> Success ds -> compileDecls (checkAll ds) outh
+
+> compileDecls (Success (ctxt, decls)) outh
+> = do hPutStr outh $ codegenC ctxt decls
+> hFlush outh
+> hClose outh
+
+> compileDecls (Failure err _ _) _ = putStrLn err
+
Oops, something went wrong.

0 comments on commit 9bdcc46

Please sign in to comment.