Permalink
Browse files

Moving parsing preprocessing into ghc-parser

  • Loading branch information...
1 parent 806aa5c commit bfb0af88cdfae7261fe648084b3ceb7e6a994de8 @gibiansky gibiansky committed Feb 7, 2014
@@ -1,3 +1,4 @@
+{-# LANGUAGE DeriveFunctor #-}
module Language.Haskell.GHC.Parser (
-- Parser handling
runParser,
@@ -7,6 +8,7 @@ module Language.Haskell.GHC.Parser (
StringLoc(..),
ParseOutput(..),
Parser,
+ Located(..),
-- Different parsers
parserStatement,
@@ -21,18 +23,22 @@ module Language.Haskell.GHC.Parser (
partialTypeSignature,
partialModule,
partialExpression,
+
+ -- Haskell string preprocessing.
+ removeComments,
+ layoutChunks,
) where
-import Data.List (intercalate)
+import Data.List (intercalate, findIndex)
import Bag
import ErrUtils hiding (ErrMsg)
import FastString
-import GHC
+import GHC hiding (Located)
import Lexer
import OrdList
import Outputable hiding ((<>))
-import SrcLoc
+import SrcLoc hiding (Located)
import StringBuffer
import qualified Language.Haskell.GHC.HappyParser as Parse
@@ -57,6 +63,13 @@ data ParseOutput a
deriving (Eq, Show) -- Auxiliary strings say what part of the
-- input string was used and what
-- part is remaining.
+ --
+-- | Store locations along with a value.
+data Located a = Located {
+ line :: LineNumber, -- Where this element is located.
+ unloc :: a -- Located element.
+ } deriving (Eq, Show, Functor)
+
data ParserType = FullParser | PartialParser
data Parser a = Parser ParserType (P a)
@@ -128,3 +141,79 @@ splitAtLoc line col string =
-- Not the same as 'unlines', due to trailing \n
joinLines :: [String] -> String
joinLines = intercalate "\n"
+
+-- | Split an input string into chunks based on indentation.
+-- A chunk is a line and all lines immediately following that are indented
+-- beyond the indentation of the first line. This parses Haskell layout
+-- rules properly, and allows using multiline expressions via indentation.
+layoutChunks :: String -> [Located String]
+layoutChunks = go 1
+ where
+ go :: LineNumber -> String -> [Located String]
+ go line = filter (not . null . unloc) . map (fmap strip) . layoutLines line . lines
+
+ -- drop spaces on left and right
+ strip = dropRight . dropLeft
+ where
+ dropLeft = dropWhile (`elem` whitespace)
+ dropRight = reverse . dropWhile (`elem` whitespace) . reverse
+ whitespace = " \t\n"
+
+ layoutLines :: LineNumber -> [String] -> [Located String]
+ -- Empty string case. If there's no input, output is empty.
+ layoutLines _ [] = []
+
+ -- Use the indent of the first line to find the end of the first block.
+ layoutLines lineIdx all@(firstLine:rest) =
+ let firstIndent = indentLevel firstLine
+ blockEnded line = indentLevel line <= firstIndent in
+ case findIndex blockEnded rest of
+ -- If the first block doesn't end, return the whole string, since
+ -- that just means the block takes up the entire string.
+ Nothing -> [Located lineIdx $ intercalate "\n" all]
+
+ -- We found the end of the block. Split this bit out and recurse.
+ Just idx ->
+ let (before, after) = splitAt idx rest in
+ Located lineIdx (joinLines $ firstLine:before) : go (lineIdx + idx + 1) (joinLines after)
+
+ -- Compute indent level of a string as number of leading spaces.
+ indentLevel :: String -> Int
+ indentLevel (' ':str) = 1 + indentLevel str
+
+ -- Count a tab as two spaces.
+ indentLevel ('\t':str) = 2 + indentLevel str
+
+ -- Count empty lines as a large indent level, so they're always with the previous expression.
+ indentLevel "" = 100000
+
+ indentLevel _ = 0
+
+-- | Drop comments from Haskell source.
+-- Simply gets rid of them, does not replace them in any way.
+removeComments :: String -> String
+removeComments = removeOneLineComments . removeMultilineComments 0
+ where
+ removeOneLineComments str =
+ case str of
+ -- Don't remove comments after cmd directives
+ ':':'!':remaining ->":!" ++ takeLine remaining ++ dropLine remaining
+ '-':'-':remaining -> dropLine remaining
+ x:xs -> x:removeOneLineComments xs
+ [] -> []
+ where
+ dropLine = removeOneLineComments . dropWhile (/= '\n')
+ takeLine = takeWhile (/= '\n')
+
+ removeMultilineComments nesting str =
+ case str of
+ '{':'-':remaining -> removeMultilineComments (nesting + 1) remaining
+ '-':'}':remaining ->
+ if nesting > 0
+ then removeMultilineComments (nesting - 1) remaining
+ else '-':'}':removeMultilineComments nesting remaining
+ x:xs ->
+ if nesting > 0
+ then removeMultilineComments nesting xs
+ else x:removeMultilineComments nesting xs
+ [] -> []
@@ -2,7 +2,7 @@
-- documentation, see http://haskell.org/cabal/users-guide/
name: ghc-parser
-version: 0.1.0.0
+version: 0.1.1.0
synopsis: Haskell source parser from GHC.
-- description:
homepage: https://github.com/gibiansky/IHaskell
View
@@ -64,7 +64,7 @@ library
directory -any,
filepath -any,
ghc ==7.6.*,
- ghc-parser -any,
+ ghc-parser >=0.1.1,
ghc-paths ==0.1.*,
haskeline -any,
here -any,
@@ -17,7 +17,7 @@ import Data.Monoid
import IHaskell.Types
import IHaskell.Display
import IHaskell.IPython
-import IHaskell.Eval.Parser
+import IHaskell.Eval.Parser hiding (line)
data LintSeverity = LintWarning | LintError deriving (Eq, Show)
@@ -50,11 +50,6 @@ data CodeBlock
| ParseError StringLoc ErrMsg -- ^ An error indicating that parsing the code block failed.
deriving (Show, Eq)
--- | Store locations along with a value.
-data Located a = Located LineNumber a deriving (Eq, Show)
-instance Functor Located where
- fmap f (Located line a) = Located line $ f a
-
-- | Directive types. Each directive is associated with a string in the
-- directive code block.
data DirectiveType
@@ -70,14 +65,6 @@ data DirectiveType
| GetDoc -- ^ Get documentation for an identifier via Hoogle.
deriving (Show, Eq)
--- | Unlocate something - drop the position.
-unloc :: Located a -> a
-unloc (Located _ a) = a
-
--- | Get the line number of a located element.
-line :: Located a -> LineNumber
-line (Located l _) = l
-
-- | Parse a string into code blocks.
parseString :: GhcMonad m => String -> m [Located CodeBlock]
parseString codeString = do
@@ -88,7 +75,7 @@ parseString codeString = do
Parsed {} -> return [Located 1 $ Module codeString]
Failure {} -> do
-- Split input into chunks based on indentation.
- let chunks = layoutChunks $ dropComments codeString
+ let chunks = layoutChunks $ removeComments codeString
result <- joinFunctions <$> processChunks [] chunks
-- Return to previous flags. When parsing, flags can be set to make
@@ -268,69 +255,6 @@ parseDirective (':':directive) line = case find rightDirective directives of
]
parseDirective _ _ = error "Directive must start with colon!"
-
--- | Split an input string into chunks based on indentation.
--- A chunk is a line and all lines immediately following that are indented
--- beyond the indentation of the first line. This parses Haskell layout
--- rules properly, and allows using multiline expressions via indentation.
-layoutChunks :: String -> [Located String]
-layoutChunks = go 1
- where
- go :: LineNumber -> String -> [Located String]
- go line = filter (not . null . unloc) . map (fmap strip) . layoutLines line . lines
-
- layoutLines :: LineNumber -> [String] -> [Located String]
- -- Empty string case. If there's no input, output is empty.
- layoutLines _ [] = []
-
- -- Use the indent of the first line to find the end of the first block.
- layoutLines lineIdx all@(firstLine:rest) =
- let firstIndent = indentLevel firstLine
- blockEnded line = indentLevel line <= firstIndent in
- case findIndex blockEnded rest of
- -- If the first block doesn't end, return the whole string, since
- -- that just means the block takes up the entire string.
- Nothing -> [Located lineIdx $ intercalate "\n" all]
-
- -- We found the end of the block. Split this bit out and recurse.
- Just idx ->
- let (before, after) = splitAt idx rest in
- Located lineIdx (joinLines $ firstLine:before) : go (lineIdx + idx + 1) (joinLines after)
-
- -- Compute indent level of a string as number of leading spaces.
- indentLevel :: String -> Int
- indentLevel (' ':str) = 1 + indentLevel str
-
- -- Count a tab as two spaces.
- indentLevel ('\t':str) = 2 + indentLevel str
-
- -- Count empty lines as a large indent level, so they're always with the previous expression.
- indentLevel "" = 100000
-
- indentLevel _ = 0
-
--- Not the same as 'unlines', due to trailing \n
-joinLines :: [String] -> String
-joinLines = intercalate "\n"
-
--- | Drop comments from Haskell source.
-dropComments :: String -> String
-dropComments = removeOneLineComments . removeMultilineComments
- where
- -- Don't remove comments after cmd directives
- removeOneLineComments (':':'!':remaining) = ":!" ++ takeWhile (/= '\n') remaining ++
- removeOneLineComments (dropWhile (/= '\n') remaining)
- removeOneLineComments ('-':'-':remaining) = removeOneLineComments (dropWhile (/= '\n') remaining)
- removeOneLineComments (x:xs) = x:removeOneLineComments xs
- removeOneLineComments x = x
-
- removeMultilineComments ('{':'-':remaining) =
- case subIndex "-}" remaining of
- Nothing -> ""
- Just idx -> removeMultilineComments $ drop (2 + idx) remaining
- removeMultilineComments (x:xs) = x:removeMultilineComments xs
- removeMultilineComments x = x
-
-- | Parse a module and return the name declared in the 'module X where'
-- line. That line is required, and if it does not exist, this will error.
-- Names with periods in them are returned piece y piece.
@@ -344,3 +268,7 @@ getModuleName moduleSrc = do
case unLoc <$> hsmodName (unLoc mod) of
Nothing -> error "Module must have a name."
Just name -> return $ split "." $ moduleNameString name
+
+-- Not the same as 'unlines', due to trailing \n
+joinLines :: [String] -> String
+joinLines = intercalate "\n"

0 comments on commit bfb0af8

Please sign in to comment.