Skip to content
This repository has been archived by the owner on Jan 13, 2022. It is now read-only.

Commit

Permalink
options for multiple cores and ast caching
Browse files Browse the repository at this point in the history
Summary: investigating efficiency options; currently multiple cores
and using tmpfs don't seem to make much difference.  ast caching saves
40-50% of run time for normal transformations (parsing-isn't-as-
expensive / transformations-are-more-expensive than i thought).  but i
need to look more, and other ppl can too.  defaults are still same
behavior before this change.

Test Plan: running and timing with different settings/environments

Revert Plan: ok
  • Loading branch information
Daniel Corson committed Nov 6, 2009
1 parent 16515bd commit 873d651
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 122 deletions.
2 changes: 2 additions & 0 deletions lex-pass.cabal
Expand Up @@ -22,6 +22,7 @@ executable lex-pass
build-depends: FUtil, HSH >= 2, base >= 4, binary, bytestring, containers,
derive, directory, filepath, mtl, parsec >= 2 && < 3, process,
syb
ghc-options: -threaded

executable lex-pass-test
if !flag(test-only)
Expand All @@ -31,4 +32,5 @@ executable lex-pass-test
build-depends: FUtil, HSH >= 2, base >= 4, binary, bytestring, containers,
derive, directory, filepath, mtl, parsec >= 2 && < 3, process,
syb
ghc-options: -threaded

57 changes: 33 additions & 24 deletions src/LexPassUtil.hs
Expand Up @@ -10,6 +10,7 @@ import Data.Generics
import FUtil
import HSH
import Lang.Php.Ast
import Options
import System.Directory
import System.FilePath
import System.IO
Expand All @@ -24,15 +25,14 @@ data Transf = Transf {
transfName :: String,
transfTypes :: [String],
transfDoc :: String,
transfArgs :: String,
transfFunc :: [String] -> FilePath -> FilePath -> Int -> Int ->
CanErrStrIO (Bool, [String])
}
transfArgs :: String, -- unused currently
transfFunc :: [String] -> Options -> FilePath -> FilePath -> Int -> Int ->
CanErrStrIO (Bool, [String])}

data Transformed a = Transformed {
infoLines :: [String],
transfResult :: Maybe a
} deriving (Show)
transfResult :: Maybe a}
deriving (Show)

instance Functor Transformed where
fmap f t = t {transfResult = fmap f $ transfResult t}
Expand All @@ -50,27 +50,27 @@ name -:- ftypes = (name, ftypes)
(-?-) :: (String, [String]) -> String -> (String, [String], String)
(name, ftypes) -?- doc = (name, ftypes, doc)

(-=-) :: (String, [String], String) -> ([String] -> FilePath -> FilePath ->
Int -> Int -> CanErrStrIO (Bool, [String])) -> Transf
(-=-) :: (String, [String], String) -> ([String] -> Options -> FilePath ->
FilePath -> Int -> Int -> CanErrStrIO (Bool, [String])) -> Transf
(name, ftypes, doc) -=- func = Transf {
transfName = bareName,
transfTypes = ftypes,
transfDoc = doc,
transfArgs = argInfo, -- unused currently
transfFunc = func
} where (bareName, argInfo) = break (== ' ') name
transfArgs = argInfo,
transfFunc = func}
where (bareName, argInfo) = break (== ' ') name

-- todo: something more graceful here?
argless :: (t -> t1 -> t2) -> [a] -> t -> t1 -> t2
argless f args dir subPath = if null args then f dir subPath
else error "Expected no arguments."

lexPass :: (Binary a, Parse a, Unparse a) => (a -> Transformed a) ->
FilePath -> FilePath -> Int -> Int -> CanErrStrIO (Bool, [String])
lexPass transf codeDir subPath total cur = do
Options -> FilePath -> FilePath -> Int -> Int -> CanErrStrIO (Bool, [String])
lexPass transf opts codeDir subPath total cur = do
io . hPutStrLn stderr $ "Checking (" ++ show cur ++ "/" ++ show total ++
") " ++ subPath
ast <- io $ parseAndCache codeDir subPath
ast <- io $ parseAndCache (optCacheAsts opts) codeDir subPath
case transf ast of
Transformed {infoLines = infoLines, transfResult = Nothing} ->
return (False, infoLines)
Expand Down Expand Up @@ -167,8 +167,9 @@ astPath codeDir subPath = codeDir </> ".ast" </> subPath ++ ".ast"
transfModsFile = updateState ((,) True . snd)

-- combine these into AnAst?
parseAndCache :: (Binary a, Parse a, Unparse a) => FilePath -> FilePath -> IO a
parseAndCache codeDir subPath = do
parseAndCache :: (Binary a, Parse a, Unparse a) =>
Bool -> FilePath -> FilePath -> IO a
parseAndCache cacheAsts codeDir subPath = do
let
astFilename = astPath codeDir subPath
regen = do
Expand All @@ -180,14 +181,22 @@ parseAndCache codeDir subPath = do
createDirectoryIfMissing True $ takeDirectory astFilename
encodeFile astFilename ast
return ast
doesFileExist astFilename >>= \ r -> if r
then do
mtimeAst <- getModificationTime astFilename
mtimeFile <- getModificationTime (codeDir </> subPath)
if mtimeFile > mtimeAst
then regen
else decodeFile astFilename
else regen
if cacheAsts
then
doesFileExist astFilename >>= \ r -> if r
then do
mtimeAst <- getModificationTime astFilename
mtimeFile <- getModificationTime (codeDir </> subPath)
if mtimeFile > mtimeAst
then regen
else decodeFile astFilename
else regen
else do
hPutStrLn stderr "- Parsing (always)"
c <- readFileStrict $ codeDir </> subPath
return $ case runParser parse () subPath c of
Left err -> error $ show err
Right ast -> ast

--
-- eof
Expand Down
103 changes: 27 additions & 76 deletions src/Main.hs
@@ -1,3 +1,5 @@
import Control.Applicative
import Control.Concurrent
import Control.Monad
import Control.Monad.Error
import Data.Char
Expand All @@ -13,50 +15,10 @@ import System.Process

import CodeGen.Transf
import LexPassUtil
import Options
import TaskPool
import qualified Config

data Options = Options {
optFiles :: Bool,
optOnlyChangedFiles :: Bool,
optMaxN :: Maybe Int,
optDir :: Maybe String,
optStartAtFile :: Maybe String}
deriving Show

defaultOptions :: Options
defaultOptions = Options {
optFiles = False,
optOnlyChangedFiles = False,
optMaxN = Nothing,
optDir = Nothing,
optStartAtFile = Nothing}

options :: [OptDescr (Options -> Options)]
options = [
Option "c" ["only-changed-files"]
(NoArg (\ opts -> opts {optOnlyChangedFiles = True}))
"Only consider changing files that already\n\
\have local modifications (NOTE: git-only\n\
\currently).",
Option "d" ["dir"]
(ReqArg (\ d opts -> opts {optDir = Just d}) "<dir>")
"Top-level directory containing parsable\n\
\files of interest. Abstract syntax trees\n\
\will be cached in top-level .ast/\n\
\directory.",
Option "f" ["files"]
(NoArg (\ opts -> opts {optFiles = True}))
"Pass a specific list of files to stdin\n\
\(newline-delimited).",
Option "n" ["max-n-files"]
(ReqArg (\ n opts -> opts {optMaxN = Just $ read n}) "<n>")
"Change no more than <n> files total.",
Option "s" ["start-at-file"]
(ReqArg (\ f opts -> opts {optStartAtFile = Just f}) "<file>")
"Start at a particular file instead of the\n\
\\"beginning\" of the file list, looping back\n\
\around to get all files."]

endSpan :: (a -> Bool) -> [a] -> ([a], [a])
endSpan p = uncurry (flip (,)) . bothond reverse . span p . reverse

Expand All @@ -82,12 +44,6 @@ usage err =
intercalate "\n" (zipWith (++) (repeat " ") .
wordWrap 78 $ transfDoc t)

sourceFiles :: [String] -> FilePath -> Bool -> IO [String]
sourceFiles ftypes dir onlyChanged =
if onlyChanged
then error "not working right now" --"git-files-modified"
else Config.sourceFiles ftypes dir

showStRes :: CanErrStrIO (Bool, [String]) -> CanErrStrIO Bool
showStRes f = do
(ret, st) <- f
Expand All @@ -100,20 +56,21 @@ lookupTrans name = case filter ((== name) . transfName) transfs of
[] -> error $ "No transformer matched: " ++ name
_ -> error $ "Serious uh-oh; multiple transformers matched: " ++ name

transfOnFile :: Transf -> [String] -> FilePath -> FilePath -> Int -> Int ->
CanErrStrIO Bool
transfOnFile transf args dir file total cur =
showStRes $ (transfFunc transf) args dir file total cur
transfOnFile :: Options -> Transf -> [String] -> FilePath -> FilePath ->
Int -> Int -> CanErrStrIO Bool
transfOnFile opts transf args dir file total cur =
showStRes $ (transfFunc transf) args opts dir file total cur

changeFiles :: Options -> (FilePath -> Int -> Int -> CanErrStrIO Bool) ->
[FilePath] -> IO ()
changeFiles opts f paths = taskPool (optNumCores opts) .
map (\ (n, p) -> dieOnErrors $ f p (length paths) n) $ zip [1..] paths

changeMaxNFiles :: Maybe Int -> Int -> Int ->
(String -> Int -> Int -> CanErrStrIO Bool) -> [String] -> CanErrStrIO ()
changeMaxNFiles (Just 0) _ _ _ _ = return ()
changeMaxNFiles _ _ _ _ [] = return ()
changeMaxNFiles nMb total cur f (fileName:fileNames) = do
res <- f fileName total cur
let
nMb' = liftM (\ n -> if res then n - 1 else n) nMb
changeMaxNFiles nMb' total (cur + 1) f fileNames
dieOnErrors x = do
r <- runErrorT x
case r of
Left e -> error e
Right _ -> return ()

main :: IO ()
main = do
Expand All @@ -131,19 +88,13 @@ main = do
transf = lookupTrans transfName
subPaths <- if optFiles opts
then getContents >>= return . lines
else sourceFiles (transfTypes transf) dir $ optOnlyChangedFiles opts
ret <- runErrorT $ do
subPaths' <- case optStartAtFile opts of
Nothing -> return subPaths
Just f -> do
let (pre, rest) = span (/= f) subPaths
case rest of
[] -> throwError $ "Couldn't start at file " ++ show f ++
" which isn't in the list of files to change."
_ -> return $ rest ++ pre
changeMaxNFiles (optMaxN opts) (length subPaths') 1
(transfOnFile transf args dir) subPaths'
case ret of
Left err -> hPutStr stderr err
Right () -> return ()
else Config.sourceFiles (transfTypes transf) dir
let
subPaths' = case optStartAtFile opts of
Nothing -> subPaths
Just f -> let (pre, rest) = span (/= f) subPaths in case rest of
[] -> error $ "Couldn't start at file " ++ show f ++
" which isn't in the list of files to change."
_ -> rest ++ pre
changeFiles opts (transfOnFile opts transf args dir) subPaths'

44 changes: 44 additions & 0 deletions src/Options.hs
@@ -0,0 +1,44 @@
module Options where

import System.Console.GetOpt

data Options = Options {
optCacheAsts :: Bool,
optNumCores :: Int,
optFiles :: Bool,
optDir :: Maybe String,
optStartAtFile :: Maybe String}
deriving Show

defaultOptions :: Options
defaultOptions = Options {
optCacheAsts = True,
optNumCores = 1,
optFiles = False,
optDir = Nothing,
optStartAtFile = Nothing}

options :: [OptDescr (Options -> Options)]
options = [
Option "a" ["do-not-cache-asts"]
(NoArg (\ opts -> opts {optCacheAsts = False}))
"Do not cache binary dump of ASTs to disk (in .ast/).",
Option "C" ["num-cores"]
(ReqArg (\ n opts -> opts {optNumCores = read n}) "<n>")
"Run n OS threads to optimize for n cores.",
Option "d" ["dir"]
(ReqArg (\ d opts -> opts {optDir = Just d}) "<dir>")
"Top-level directory containing parsable\n\
\files of interest. Abstract syntax trees\n\
\will be cached in top-level .ast/\n\
\directory.",
Option "f" ["files"]
(NoArg (\ opts -> opts {optFiles = True}))
"Pass a specific list of files to stdin\n\
\(newline-delimited).",
Option "s" ["start-at-file"]
(ReqArg (\ f opts -> opts {optStartAtFile = Just f}) "<file>")
"Start at a particular file instead of the\n\
\\"beginning\" of the file list, looping back\n\
\around to get all files."]

30 changes: 30 additions & 0 deletions src/TaskPool.hs
@@ -0,0 +1,30 @@
module TaskPool where

import Control.Exception
import Control.Concurrent
import Control.Monad
import Prelude hiding (catch)

type TaskChan a = Chan (Maybe (IO a))

worker :: TaskChan a -> Chan () -> IO ()
worker q doneChan = do
taskMb <- readChan q
case taskMb of
Nothing -> writeChan doneChan ()
Just task -> errorOnExceptions task >> worker q doneChan

errorOnExceptions :: IO a -> IO a
errorOnExceptions task = catchAll task (error . show)

catchAll :: IO a -> (SomeException -> IO a) -> IO a
catchAll = catch

taskPool :: Int -> [IO ()] -> IO ()
taskPool n tasks = do
q <- newChan
doneChan <- newChan
tids <- replicateM n . forkIO $ worker q doneChan
mapM_ (writeChan q) $ map Just tasks ++ replicate n Nothing
replicateM_ n $ readChan doneChan

22 changes: 0 additions & 22 deletions src/Transf/ExampleGlolbal.hs

This file was deleted.

0 comments on commit 873d651

Please sign in to comment.