Skip to content

Commit

Permalink
Add documentation, tidy up code
Browse files Browse the repository at this point in the history
  • Loading branch information
bos committed Jan 16, 2009
1 parent 2c61e00 commit 6694488
Show file tree
Hide file tree
Showing 11 changed files with 297 additions and 198 deletions.
41 changes: 19 additions & 22 deletions Data/Attoparsec.hs
Expand Up @@ -8,13 +8,13 @@
-- Stability : experimental
-- Portability : unknown
--
-- Simple, efficient parser combinators for lazy 'LB.ByteString'
-- Simple, efficient parser combinators for lazy 'ByteString'
-- strings, loosely based on 'Text.ParserCombinators.Parsec'.
--
-----------------------------------------------------------------------------
module Data.Attoparsec
(
-- * Parser
-- * Parser types
ParseError
, Parser

Expand All @@ -25,36 +25,33 @@ module Data.Attoparsec

-- * Combinators
, (<?>)

-- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
, try
, endOfInput
, lookAhead
, peek
, module Data.Attoparsec.Combinator

-- * Things like in @Parsec.Char@
, satisfy
-- * Parsing individual bytes
, anyWord8
, word8
, notWord8
, word8
, satisfy

-- * Efficient string handling
, string
, skipWhile
, stringTransform

-- * Parser converters.
, eitherP

-- * Miscellaneous functions.
, getInput
, getConsumed
, takeAll
, takeTill
, takeWhile
, takeWhile1
, takeTill
, takeAll
, skipWhile
, notEmpty

-- ** Combinators
, match
, notEmpty

, module Data.Attoparsec.Combinator
-- * State observation functions
, endOfInput
, getConsumed
, getInput
, lookAhead
) where

import Data.Attoparsec.Combinator
Expand Down
76 changes: 42 additions & 34 deletions Data/Attoparsec/Char8.hs
Expand Up @@ -2,20 +2,25 @@
-----------------------------------------------------------------------------
-- |
-- Module : Data.Attoparsec.Char8
-- Copyright : Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
-- Copyright : Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2009
-- License : BSD3
--
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : unknown
--
-- Simple, efficient parser combinators for lazy 'LB.ByteString'
-- strings, loosely based on 'Text.ParserCombinators.Parsec'.
-- Simple, efficient, character-oriented parser combinators for lazy
-- 'LB.ByteString' strings, loosely based on the Parsec library.
--
-- /Note/: This module is intended for parsing text that is
-- represented using an 8-bit character set, e.g. ASCII or
-- ISO-8859-15. It /does not/ deal with character encodings,
-- multibyte characters, or wide characters. Any attempts to use
-- characters above code point 255 will give wrong answers.
-----------------------------------------------------------------------------
module Data.Attoparsec.Char8
(
-- * Parser
-- * Parser types
ParseError
, Parser

Expand All @@ -26,48 +31,51 @@ module Data.Attoparsec.Char8

-- * Combinators
, (<?>)

-- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
, try
, endOfInput
, lookAhead
, peek

-- * Things like in @Parsec.Char@
, satisfy
, letter
, digit
-- * Parsing individual characters
, anyChar
, space
, char
, digit
, letter
, notChar
, space
, satisfy

-- ** Character classes
, inClass
, notInClass

-- * Efficient string handling
, string
, stringCI
, skipSpace
, skipWhile
, takeAll
, takeCount
, takeTill
, takeWhile
, takeWhile1

-- * Parser converters.
, eitherP
-- ** Combinators
, match
, notEmpty

-- * Text parsing
, endOfLine

-- * Numeric parsers.
-- * Numeric parsers
, int
, integer
, double

-- * Miscellaneous functions.
, getInput
-- * State observation functions
, endOfInput
, getConsumed
, takeWhile
, takeWhile1
, takeTill
, takeAll
, takeCount
, skipWhile
, skipSpace
, notEmpty
, match
, inClass
, notInClass
, endOfLine
, getInput
, lookAhead

-- * Combinators
, module Data.Attoparsec.Combinator
) where

Expand All @@ -76,13 +84,13 @@ import qualified Data.ByteString.Lazy.Char8 as LB
import Data.ByteString.Internal (w2c)
import Data.Char (isDigit, isLetter, isSpace, toLower)
import Data.Attoparsec.FastSet
(FastSet, memberChar, set)
(FastSet, charClass, memberChar, set)
import qualified Data.Attoparsec.Internal as I
import Data.Attoparsec.Combinator
import Data.Attoparsec.Internal
(Parser, ParseError, (<?>), parse, parseAt, parseTest, try, endOfInput,
lookAhead, peek, string,
eitherP, getInput, getConsumed, takeAll, takeCount, notEmpty, match,
lookAhead, string,
getInput, getConsumed, takeAll, takeCount, notEmpty, match,
endOfLine, setInput)
import Data.ByteString.Lex.Lazy.Double (readDouble)
import Prelude hiding (takeWhile)
Expand Down
30 changes: 18 additions & 12 deletions Data/Attoparsec/Char8Boilerplate.h
Expand Up @@ -21,41 +21,47 @@ space :: PARSER Char
space = satisfy isSpace <?> "space"
{-# INLINE space #-}

-- | Satisfy a specific character.
-- | Match a specific character.
char :: Char -> PARSER Char
char c = satisfy (== c) <?> [c]
{-# INLINE char #-}

-- | Satisfy a specific character.
-- | Match any character except the given one.
notChar :: Char -> PARSER Char
notChar c = satisfy (/= c) <?> "not " ++ [c]
{-# INLINE notChar #-}

charClass :: String -> FastSet
charClass = set . SB.pack . go
where go (a:'-':b:xs) = [a..b] ++ go xs
go (x:xs) = x : go xs
go _ = ""

-- | Match any character in a set.
--
-- > vowel = inClass "aeiou"
--
-- Range notation is supported.
--
-- > halfAlphabet = inClass "a-nA-N"
--
-- To add a literal \'-\' to a set, place it at the beginning or end
-- of the string.
inClass :: String -> Char -> Bool
inClass s = (`memberChar` myset)
where myset = charClass s
inClass s = (`memberChar` mySet)
where mySet = charClass s
{-# INLINE inClass #-}

-- | Match any character not in a set.
notInClass :: String -> Char -> Bool
notInClass s = not . inClass s
{-# INLINE notInClass #-}

-- | Consume characters while the predicate is true.
-- | Consume characters while the predicate succeeds.
takeWhile :: (Char -> Bool) -> PARSER LB.ByteString
takeWhile p = I.takeWhile (p . w2c)
{-# INLINE takeWhile #-}

-- | Consume characters while the predicate fails.
takeTill :: (Char -> Bool) -> PARSER LB.ByteString
takeTill p = I.takeTill (p . w2c)
{-# INLINE takeTill #-}

-- | Skip over characters while the predicate is true.
-- | Skip over characters while the predicate succeeds.
skipWhile :: (Char -> Bool) -> PARSER ()
skipWhile p = I.skipWhile (p . w2c)
{-# INLINE skipWhile #-}
Expand Down
36 changes: 21 additions & 15 deletions Data/Attoparsec/Combinator.hs
Expand Up @@ -9,7 +9,7 @@
-- Stability : experimental
-- Portability : portable
--
-- Useful parser combinators, similar to Parsec.
-- Useful parser combinators, similar to those provided by Parsec.
--
-----------------------------------------------------------------------------
module Data.Attoparsec.Combinator
Expand All @@ -23,50 +23,51 @@ module Data.Attoparsec.Combinator
, sepBy1
, skipMany
, skipMany1
, eitherP
, module Control.Applicative
) where

import Control.Applicative

-- | @choice ps@ tries to apply the parsers in the list @ps@ in order,
-- | @choice ps@ tries to apply the actions in the list @ps@ in order,
-- until one of them succeeds. Returns the value of the succeeding
-- parser.
-- action.
choice :: Alternative f => [f a] -> f a
choice = foldr (<|>) empty

-- | @option x p@ tries to apply parser @p@. If @p@ fails without
-- | @option x p@ tries to apply action @p@. If @p@ fails without
-- consuming input, it returns the value @x@, otherwise the value
-- returned by @p@.
--
-- > priority = option 0 (digitToInt <$> digit)
option :: Alternative f => a -> f a -> f a
option x p = p <|> pure x

-- | @many1 p@ applies the parser @p@ /one/ or more times. Returns a
-- | @many1 p@ applies the action @p@ /one/ or more times. Returns a
-- list of the returned values of @p@.
--
-- > word = many1 letter
many1 :: Alternative f => f a -> f [a]
many1 p = liftA2 (:) p (many p)

-- | @sepBy p sep@ parses /zero/ or more occurrences of @p@, separated
-- by @sep@. Returns a list of values returned by @p@.
-- | @sepBy p sep@ applies /zero/ or more occurrences of @p@, separated
-- by @sep@. Returns a list of the values returned by @p@.
--
-- > commaSep p = p `sepBy` (symbol ",")
sepBy :: Alternative f => f a -> f s -> f [a]
sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure []

-- | @sepBy1 p sep@ parses /one/ or more occurrences of @p@, separated
-- by @sep@. Returns a list of values returned by @p@.
-- | @sepBy1 p sep@ applies /one/ or more occurrences of @p@, separated
-- by @sep@. Returns a list of the values returned by @p@.
--
-- > commaSep p = p `sepBy` (symbol ",")
sepBy1 :: Alternative f => f a -> f s -> f [a]
sepBy1 p s = scan
where scan = liftA2 (:) p ((s *> scan) <|> pure [])

-- | @manyTill p end@ applies parser @p@ /zero/ or more times until
-- parser @end@ succeeds. Returns the list of values returned by @p@.
-- This parser can be used to scan comments:
-- | @manyTill p end@ applies action @p@ /zero/ or more times until
-- action @end@ succeeds, and returns the list of values returned by
-- @p@. This can be used to scan comments:
--
-- > simpleComment = string "<!--" *> manyTill anyChar (try (string "-->"))
--
Expand All @@ -76,16 +77,21 @@ manyTill :: Alternative f => f a -> f b -> f [a]
manyTill p end = scan
where scan = (end *> pure []) <|> liftA2 (:) p scan

-- | Skip zero or more instances of the parser.
-- | Skip zero or more instances of an action.
skipMany :: Alternative f => f a -> f ()
skipMany p = scan
where scan = (p *> scan) <|> pure ()

-- | Skip one or more instances of the parser.
-- | Skip one or more instances of an action.
skipMany1 :: Alternative f => f a -> f ()
skipMany1 p = p *> skipMany p

-- | Apply the given parser repeatedly, returning every parse result.
-- | Apply the given action repeatedly, returning every result.
count :: Monad m => Int -> m a -> m [a]
count n p = sequence (replicate n p)
{-# INLINE count #-}

-- | Combine two alternatives.
eitherP :: (Alternative f) => f a -> f b -> f (Either a b)
eitherP a b = (Left <$> a) <|> (Right <$> b)
{-# INLINE eitherP #-}
8 changes: 8 additions & 0 deletions Data/Attoparsec/FastSet.hs
Expand Up @@ -28,6 +28,8 @@ module Data.Attoparsec.FastSet
, memberWord8
-- * Debugging
, fromSet
-- * Handy interface
, charClass
) where

import Data.Bits ((.&.), (.|.), shiftL, shiftR)
Expand Down Expand Up @@ -94,3 +96,9 @@ mkTable s = I.unsafeCreate 32 $ \t -> do
pokeByteOff t byte (prev .|. bit)
loop (n + 1)
in loop 0

charClass :: String -> FastSet
charClass = set . B8.pack . go
where go (a:'-':b:xs) = [a..b] ++ go xs
go (x:xs) = x : go xs
go _ = ""

0 comments on commit 6694488

Please sign in to comment.