diff --git a/Data/Csv.hs b/Data/Csv.hs index ff9dac3..903b952 100644 --- a/Data/Csv.hs +++ b/Data/Csv.hs @@ -1,5 +1,9 @@ --- | This module implements encoding and decoding of CSV data. The --- implementation is RFC 4180 compliant, with the following +-- | This module implements encoding and decoding of CSV and +-- space-delimited data. APIs for both format are essentially the +-- same. They only differ in function names and space-delimited data +-- doesn't have encoding\/decoding options. +-- +-- The CSV implementation is RFC 4180 compliant, with the following -- extensions: -- -- * Empty lines are ignored. @@ -9,6 +13,16 @@ -- -- * Escaped fields may contain any characters (but double-quotes -- need to be escaped). +-- +-- Space-delimited data don't have specification so following format +-- is assumed: +-- +-- * Fields are delimited by one or more tabs and spaces at the +-- beginning and end of line are ignored. +-- +-- * Empty lines are ignored. +-- +-- * Escaping rules are same as with CSV module Data.Csv ( -- * Usage example @@ -20,7 +34,7 @@ module Data.Csv -- * Custom type conversions -- $customtypeconversions - -- * Encoding and decoding + -- * Encoding and decoding CSV -- $encoding decode , decodeByName @@ -38,6 +52,12 @@ module Data.Csv , encodeWith , encodeByNameWith + -- * Space-delimited data + , decodeTable + , decodeTableByName + , encodeTable + , encodeTableByName + -- * Core CSV types , Csv , Record diff --git a/Data/Csv/Encoding.hs b/Data/Csv/Encoding.hs index cbf22b6..b1f9862 100644 --- a/Data/Csv/Encoding.hs +++ b/Data/Csv/Encoding.hs @@ -26,6 +26,12 @@ module Data.Csv.Encoding , defaultEncodeOptions , encodeWith , encodeByNameWith + + -- * Space-delimited files + , decodeTable + , decodeTableByName + , encodeTable + , encodeTableByName ) where import Blaze.ByteString.Builder (Builder, fromByteString, fromWord8, @@ -97,22 +103,10 @@ decodeWith :: FromRecord a -- skipped -> L.ByteString -- ^ CSV data -> Either String (Vector a) -decodeWith = decodeWithC (runParser . parseCsv) +decodeWith !opt = + decodeWithC (header $ decDelimiter opt) (csv opt) (runParser . parseCsv) {-# INLINE [1] decodeWith #-} -parseCsv :: FromRecord a => Csv -> Parser (Vector a) -parseCsv xs = V.fromList <$!> mapM' parseRecord (V.toList xs) - -mapM' :: Monad m => (a -> m b) -> [a] -> m [b] -mapM' f = go - where - go [] = return [] - go (x:xs) = do - !y <- f x - ys <- go xs - return (y : ys) -{-# INLINE mapM' #-} - {-# RULES "idDecodeWith" decodeWith = idDecodeWith #-} @@ -121,14 +115,16 @@ mapM' f = go -- conversion is performed. idDecodeWith :: DecodeOptions -> Bool -> L.ByteString -> Either String (Vector (Vector B.ByteString)) -idDecodeWith = decodeWithC pure +idDecodeWith !opt = decodeWithC (header $ decDelimiter opt) (csv opt) pure -decodeWithC :: (Csv -> Either String a) -> DecodeOptions -> Bool -> L.ByteString - -> Either String a -decodeWithC convert !opts skipHeader = decodeWithP parser convert +decodeWithC :: AL.Parser Header -> AL.Parser Csv + -> (Csv -> Either String a) + -> Bool -> L.ByteString -> Either String a +decodeWithC headerP body convert skipHeader + = decodeWithP parser convert where parser - | skipHeader = header (decDelimiter opts) *> csv opts - | otherwise = csv opts + | skipHeader = headerP *> body + | otherwise = body {-# INLINE decodeWithC #-} -- | Like 'decodeByName', but lets you customize how the CSV data is @@ -141,9 +137,6 @@ decodeByNameWith !opts = decodeWithP (csvWithHeader opts) (\ (hdr, vs) -> (,) <$> pure hdr <*> (runParser $ parseNamedCsv vs)) -parseNamedCsv :: FromNamedRecord a => Vector NamedRecord -> Parser (Vector a) -parseNamedCsv xs = V.fromList <$!> mapM' parseNamedRecord (V.toList xs) - -- | Options that controls how data is encoded. These options can be -- used to e.g. encode data in a tab-separated format instead of in a -- comma-separated format. @@ -180,11 +173,105 @@ encodeRecord delim = mconcat . intersperse (fromWord8 delim) . map fromByteString . map escape . V.toList {-# INLINE encodeRecord #-} +-- | Like 'encodeByName', but lets you customize how the CSV data is +-- encoded. +encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> V.Vector a + -> L.ByteString +encodeByNameWith opts hdr v = + toLazyByteString ((encodeRecord (encDelimiter opts) hdr) <> + fromByteString "\r\n" <> records) + where + records = unlines + . map (encodeRecord (encDelimiter opts) + . namedRecordToRecord hdr . toNamedRecord) + . V.toList $ v +{-# INLINE encodeByNameWith #-} + + +------------------------------------------------------------------------ +-- * Space-delimited files + +-- | Efficiently deserialize space-delimited records from a lazy +-- ByteString. If this fails due to incomplete or invalid input, +-- @'Left' msg@ is returned. +decodeTable :: FromRecord a + => Bool -- ^ Data contains header that should be skipped + -> L.ByteString -- ^ Raw data + -> Either String (Vector a) +decodeTable = + decodeWithC tableHeader table (runParser . parseCsv) +{-# INLINE decodeTable #-} + +-- | Same as 'decodeWith', but more efficient as no type +-- conversion is performed. +idDecodeTable :: Bool -> L.ByteString -> Either String (Vector (Vector B.ByteString)) +idDecodeTable = decodeWithC tableHeader table pure + +{-# RULES + "idDecodeTable" decodeTable = idDecodeTable + #-} + + +-- | Efficiently deserialize space-delimited records from a lazy +-- ByteString. If this fails due to incomplete or invalid input, +-- @'Left' msg@ is returned. The data is assumed to be preceeded by a +-- header. +decodeTableByName :: FromNamedRecord a => L.ByteString -> Either String (Header, Vector a) +decodeTableByName = + decodeWithP tableWithHeader + (\(hdr, vs) -> (,) <$> pure hdr <*> (runParser $ parseNamedCsv vs)) +{-# INLINE decodeTableByName #-} + +-- | Efficiently serialize space-delimited records as a lazy +-- ByteString. Single tab is used as separator. +encodeTable :: ToRecord a => V.Vector a -> L.ByteString +encodeTable = toLazyByteString + . unlines + . map (encodeTableRow . toRecord) + . V.toList +{-# INLINE encodeTable #-} + +-- | Efficiently serialize space-delimited records as a lazy +-- ByteString. The header is written before any records and dictates +-- the field order. Single tab is used as separator. +encodeTableByName :: ToNamedRecord a => Header -> V.Vector a -> L.ByteString +encodeTableByName hdr v = + toLazyByteString ( encodeTableRow hdr + <> fromByteString "\r\n" + <> records ) + where + records = unlines + . map (encodeTableRow . namedRecordToRecord hdr . toNamedRecord) + . V.toList $ v +{-# INLINE encodeTableByName #-} + +encodeTableRow :: Record -> Builder +encodeTableRow = mconcat . intersperse (fromWord8 9) + . map fromByteString . map escapeT . V.toList + where + -- We need to escape empty strings. Otherwise we'll get: + -- > encode ["a","","b"] = 'a b' + -- instead of + -- > encode ["a","","b"] = 'a "" b' + escapeT b | B.null b = "\"\"" + | otherwise = escape b +{-# INLINE encodeTableRow #-} + + +------------------------------------------------------------------------ +-- * Common functionality and helpers + +parseCsv :: FromRecord a => Csv -> Parser (Vector a) +parseCsv xs = V.fromList <$!> mapM' parseRecord (V.toList xs) + +parseNamedCsv :: FromNamedRecord a => Vector NamedRecord -> Parser (Vector a) +parseNamedCsv xs = V.fromList <$!> mapM' parseNamedRecord (V.toList xs) + -- TODO: Optimize escape :: B.ByteString -> B.ByteString escape s | B.find (\ b -> b == dquote || b == comma || b == nl || b == cr || - b == sp) s == Nothing = s + b == sp || b == tab) s == Nothing = s | otherwise = B.concat ["\"", B.concatMap @@ -196,21 +283,7 @@ escape s nl = 10 cr = 13 sp = 32 - --- | Like 'encodeByName', but lets you customize how the CSV data is --- encoded. -encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> V.Vector a - -> L.ByteString -encodeByNameWith opts hdr v = - toLazyByteString ((encodeRecord (encDelimiter opts) hdr) <> - fromByteString "\r\n" <> records) - where - records = unlines - . map (encodeRecord (encDelimiter opts) - . namedRecordToRecord hdr . toNamedRecord) - . V.toList $ v -{-# INLINE encodeByNameWith #-} - + tab = 9 namedRecordToRecord :: Header -> NamedRecord -> Record namedRecordToRecord hdr nr = V.map find hdr @@ -245,4 +318,15 @@ decodeWithP p to s = Left msg -> Left $ "conversion error: " ++ msg AL.Fail left _ msg -> Left $ "parse error (" ++ msg ++ ") at " ++ show (BL8.unpack left) -{-# INLINE decodeWithP #-} \ No newline at end of file +{-# INLINE decodeWithP #-} + +mapM' :: Monad m => (a -> m b) -> [a] -> m [b] +mapM' f = go + where + go [] = return [] + go (x:xs) = do + !y <- f x + ys <- go xs + return (y : ys) +{-# INLINE mapM' #-} + diff --git a/Data/Csv/Incremental.hs b/Data/Csv/Incremental.hs index 8188ce4..1155aac 100644 --- a/Data/Csv/Incremental.hs +++ b/Data/Csv/Incremental.hs @@ -8,8 +8,11 @@ module Data.Csv.Incremental ( -- * Decoding headers HeaderParser(..) + -- ** CSV , decodeHeader , decodeHeaderWith + -- ** + , decodeTableHeader -- ** Providing input -- $feed-header , feedChunkH @@ -23,11 +26,13 @@ module Data.Csv.Incremental -- $indexbased , decode , decodeWith + , decodeTable -- ** Name-based record conversion -- $namebased , decodeByName , decodeByNameWith + , decodeTableByName -- ** Providing input -- $feed-records @@ -132,10 +137,16 @@ decodeHeader = decodeHeaderWith defaultDecodeOptions -- | Like 'decodeHeader', but lets you customize how the CSV data is -- parsed. decodeHeaderWith :: DecodeOptions -> HeaderParser B.ByteString -decodeHeaderWith !opts = PartialH (go . parser) - where - parser = A.parse (header $ decDelimiter opts) +decodeHeaderWith !opts = parseWithH (header $ decDelimiter opts) + +-- | Parse header for space-delimited data. It have same semantic as +-- 'decodeHeader'. +decodeTableHeader :: HeaderParser B.ByteString +decodeTableHeader = parseWithH tableHeader +parseWithH :: A.Parser Header -> HeaderParser B.ByteString +parseWithH parser = PartialH (go . A.parse parser) + where go (A.Fail rest _ msg) = FailH rest err where err = "parse error (" ++ msg ++ ")" -- TODO: Check empty and give attoparsec one last chance to return @@ -143,6 +154,8 @@ decodeHeaderWith !opts = PartialH (go . parser) go (A.Partial k) = PartialH $ \ s -> go (k s) go (A.Done rest r) = DoneH r rest + + ------------------------------------------------------------------------ -- * Decoding records @@ -229,12 +242,27 @@ decodeWith :: FromRecord a -> Bool -- ^ Data contains header that should be -- skipped -> Parser a -decodeWith !opts skipHeader - | skipHeader = Partial $ \ s -> go (decodeHeaderWith opts `feedChunkH` s) - | otherwise = Partial (decodeWithP parseRecord opts) +decodeWith !opts = parseSimple (header d) (record d) + where + d = decDelimiter opts + +-- | Efficiently deserialize space-delimited data. +decodeTable :: FromRecord a + => Bool -- ^ Data contains header that should be + -- skipped + -> Parser a +decodeTable = parseSimple tableHeader tableRecord + +parseSimple :: FromRecord a => A.Parser Header -> A.Parser Record -> Bool -> Parser a +parseSimple headP body skipHeader + | skipHeader = Partial $ \ s -> go (parseWithH headP `feedChunkH` s) + | otherwise = Partial (decodeWithP body parseRecord) where go (FailH rest msg) = Fail rest msg go (PartialH k) = Partial $ \ s' -> go (k s') - go (DoneH _ rest) = decodeWithP parseRecord opts rest + go (DoneH _ rest) = decodeWithP body parseRecord rest + + + ------------------------------------------------------------------------ @@ -251,13 +279,26 @@ decodeByName = decodeByNameWith defaultDecodeOptions decodeByNameWith :: FromNamedRecord a => DecodeOptions -- ^ Decoding options -> HeaderParser (Parser a) -decodeByNameWith !opts = - PartialH (go . (decodeHeaderWith opts `feedChunkH`)) +decodeByNameWith !opts = parseNamed (header d) (record d) + where + d = decDelimiter opts + +-- | Efficiently deserialize space-delimited data in incremental +-- fashion. The data is assumed to be preceeded by a header. Have same +-- semantics as 'decodeByName'. +decodeTableByName :: FromNamedRecord a + => HeaderParser (Parser a) +decodeTableByName = undefined + +parseNamed :: FromNamedRecord a + => A.Parser Header -> A.Parser Record -> HeaderParser (Parser a) +parseNamed headP body = + PartialH (go . (parseWithH headP `feedChunkH`)) where go (FailH rest msg) = FailH rest msg go (PartialH k) = PartialH $ \ s -> go (k s) go (DoneH hdr rest) = - DoneH hdr (decodeWithP (parseNamedRecord . toNamedRecord hdr) opts rest) + DoneH hdr (decodeWithP body (parseNamedRecord . toNamedRecord hdr) rest) -- Copied from Data.Csv.Parser toNamedRecord :: Header -> Record -> NamedRecord @@ -266,9 +307,11 @@ toNamedRecord hdr v = HM.fromList . V.toList $ V.zip hdr v ------------------------------------------------------------------------ -- | Like 'decode', but lets you customize how the CSV data is parsed. -decodeWithP :: (Record -> Conversion.Parser a) -> DecodeOptions -> B.ByteString +decodeWithP :: A.Parser Record + -> (Record -> Conversion.Parser a) + -> B.ByteString -> Parser a -decodeWithP p !opts = go Incomplete [] . parser +decodeWithP rowParser p = go Incomplete [] . parser where go !_ !acc (A.Fail rest _ msg) | null acc = Fail rest err @@ -295,7 +338,7 @@ decodeWithP p !opts = go Incomplete [] . parser acc' | blankLine r = acc | otherwise = convert r : acc - parser = A.parse (record (decDelimiter opts) <* (endOfLine <|> endOfInput)) + parser = A.parse (rowParser <* (endOfLine <|> endOfInput)) convert = runParser . p {-# INLINE decodeWithP #-} diff --git a/Data/Csv/Parser.hs b/Data/Csv/Parser.hs index 50056dd..08e4a73 100644 --- a/Data/Csv/Parser.hs +++ b/Data/Csv/Parser.hs @@ -1,7 +1,7 @@ {-# LANGUAGE BangPatterns, CPP #-} --- | A CSV parser. The parser defined here is RFC 4180 compliant, with --- the following extensions: +-- | Parsers for CSV and space-delimited data. The CSV parser defined +-- here is RFC 4180 compliant, with the following extensions: -- -- * Empty lines are ignored. -- @@ -11,23 +11,41 @@ -- * Escaped fields may contain any characters (but double-quotes -- need to be escaped). -- +-- Space-delimited data don't have specification so following format +-- is assumed: +-- +-- * Fields are delimited by one or more tabs and spaces at the +-- beginning and end of line are ignored. +-- +-- * Empty lines are ignored. +-- +-- * Escaping rules are same as with CSV +-- -- The functions in this module can be used to implement e.g. a -- resumable parser that is fed input incrementally. module Data.Csv.Parser ( DecodeOptions(..) , defaultDecodeOptions + -- * CSV , csv , csvWithHeader , header , record , name , field + -- * Space-delimited data + , table + , tableWithHeader + , tableHeader + , tableRecord + , tableName + , tableField ) where import Blaze.ByteString.Builder (fromByteString, toByteString) import Blaze.ByteString.Builder.Char.Utf8 (fromChar) -import Control.Applicative (Alternative, (*>), (<$>), (<*), (<|>), optional, - pure) +import Control.Applicative (Alternative, (*>), (<$>), (<*), (<|>), (<$), optional, + pure, liftA2, empty) import Data.Attoparsec.Char8 (char, endOfInput, endOfLine) import qualified Data.Attoparsec as A import qualified Data.Attoparsec.Lazy as AL @@ -43,7 +61,7 @@ import Data.Word (Word8) import Data.Csv.Types import Data.Csv.Util ((<$!>)) --- | Options that controls how data is decoded. These options can be +-- | Options that controls how CSV data is decoded. These options can be -- used to e.g. decode tab-separated data instead of comma-separated -- data. -- @@ -165,6 +183,73 @@ unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote && c /= delim && c /= cr) +-- | Parse space-delimited data which doesn't include header. +table :: AL.Parser Csv +table = do + vals <- tableRecord `AL.sepBy1` endOfLine + _ <- optional endOfLine + endOfInput + return $ V.fromList $ removeBlankLines vals +{-# INLINE table #-} + +-- | Parse space-delimited data with header. +tableWithHeader :: AL.Parser (Header, V.Vector NamedRecord) +tableWithHeader = do + hdr <- tableHeader + vals <- map (toNamedRecord hdr) . removeBlankLines <$> + tableRecord `AL.sepBy1` endOfLine + _ <- optional endOfLine + endOfInput + return (hdr, V.fromList vals) + +-- | Parse header name. +tableHeader :: AL.Parser Record +tableHeader = tableRecord <* endOfLine + +-- | Parse header name. They have same format as regular 'tableField's. +tableName :: AL.Parser Field +tableName = tableField + +-- | Parse row for space-delimited data not including terminating line +-- separator. It's more complicated that CSV parser because we need +-- to drop both leading and trailing spaces. +tableRecord :: AL.Parser Record +tableRecord + = V.fromList <$> + ((delimTable <|> pure ()) *> + (tableField `sepBy11` delimTable) + ) + where + sepBy11 p s = liftA2 (:) p scan + where + scan = s *> (([] <$ eol) <|> (liftA2 (:) p scan)) + <|> pure [] + eol = ([] <$ endOfInput) <|> do + mb <- AL.peekWord8 + case mb of + Just b | b == newline || b == cr -> pure [] + _ -> empty +{-# INLINE tableRecord #-} + +-- | Parse field. It could be escaped or not escaped. The return value +-- is escaped. +tableField :: AL.Parser Field +tableField = do + mb <- A.peekWord8 + case mb of + Just b | b == doubleQuote -> escapedField + _ -> unescapedFieldTable + +unescapedFieldTable :: AL.Parser Field +unescapedFieldTable = A.takeWhile (\c -> c /= doubleQuote && + c /= newline && + c /= cr && + c /= tab && + c /= wspace ) + +delimTable :: AL.Parser () +delimTable = () <$ A.takeWhile1 (\c -> c == wspace || c == tab) + dquote :: AL.Parser Char dquote = char '"' @@ -183,7 +268,9 @@ unescape = toByteString <$!> go mempty where then return (acc `mappend` fromByteString h) else rest -doubleQuote, newline, cr :: Word8 +doubleQuote, newline, cr, tab, wspace :: Word8 doubleQuote = 34 -newline = 10 -cr = 13 +newline = 10 +cr = 13 +tab = 9 +wspace = 32 diff --git a/Data/Csv/Streaming.hs b/Data/Csv/Streaming.hs index b226d19..fcfbcd8 100644 --- a/Data/Csv/Streaming.hs +++ b/Data/Csv/Streaming.hs @@ -20,11 +20,13 @@ module Data.Csv.Streaming -- $indexbased , decode , decodeWith + , decodeTable -- ** Name-based record conversion -- $namebased , decodeByName , decodeByNameWith + , decodeTableByName ) where import Control.Applicative ((<$>), (<*>), pure) @@ -37,7 +39,7 @@ import Prelude hiding (foldr) import Data.Csv.Conversion import Data.Csv.Incremental hiding (decode, decodeByName, decodeByNameWith, - decodeWith) + decodeWith, decodeTable, decodeTableByName) import qualified Data.Csv.Incremental as I import Data.Csv.Parser import Data.Csv.Types @@ -132,9 +134,24 @@ decodeWith :: FromRecord a -- skipped -> BL.ByteString -- ^ CSV data -> Records a -decodeWith !opts skipHeader s0 = case BL.toChunks s0 of - [] -> go [] (feedEndOfInput $ I.decodeWith opts skipHeader) - (s:ss) -> go ss (I.decodeWith opts skipHeader `feedChunk` s) +decodeWith !opts skipHeader = + streamData $ I.decodeWith opts skipHeader + +-- | Efficiently deserialize space-delimited records in a streaming +-- fashion. +decodeTable :: FromRecord a + => Bool -- ^ Data contains header that should be + -- skipped + -> BL.ByteString -- ^ Space-delimited data + -> Records a +decodeTable skipHeader = + streamData $ I.decodeTable skipHeader + + +streamData :: I.Parser a -> BL.ByteString -> Records a +streamData parse s0 = case BL.toChunks s0 of + [] -> go [] (feedEndOfInput parse) + (s:ss) -> go ss (parse `feedChunk` s ) where go ss (Done xs) = foldr Cons (Nil Nothing (BL.fromChunks ss)) xs go ss (Fail rest err) = Nil (Just err) (BL.fromChunks (rest:ss)) @@ -143,6 +160,9 @@ decodeWith !opts skipHeader s0 = case BL.toChunks s0 of go [] (Some xs k) = foldr Cons (go [] (k B.empty)) xs go (s:ss) (Some xs k) = foldr Cons (go ss (k s)) xs + + + -- | Efficiently deserialize CSV in a streaming fashion. The data is -- assumed to be preceeded by a header. Returns @'Left' errMsg@ if -- parsing the header fails. Equivalent to @'decodeByNameWith' @@ -160,9 +180,25 @@ decodeByNameWith :: FromNamedRecord a => DecodeOptions -- ^ Decoding options -> BL.ByteString -- ^ CSV data -> Either String (Header, Records a) -decodeByNameWith !opts s0 = case BL.toChunks s0 of - [] -> go [] (feedEndOfInputH $ I.decodeByNameWith opts) - (s:ss) -> go ss (I.decodeByNameWith opts `feedChunkH` s) +decodeByNameWith !opts = + streamDataByName (I.decodeByNameWith opts) + + +-- | Efficiently deserialize space-delimited in a streaming +-- fashion. The data is assumed to be preceeded by a header. Returns +-- @'Left' errMsg@ if parsing the header fails. +decodeTableByName :: FromNamedRecord a + => BL.ByteString -- ^ Space-delimited data + -> Either String (Header, Records a) +decodeTableByName = + streamDataByName I.decodeTableByName + + +streamDataByName :: HeaderParser (I.Parser a) + -> BL.ByteString -> Either String (Header, Records a) +streamDataByName parser s0 = case BL.toChunks s0 of + [] -> go [] (feedEndOfInputH parser) + (s:ss) -> go ss (parser `feedChunkH` s) where go ss (DoneH hdr p) = Right (hdr, go2 ss p) go ss (FailH rest err) = Left $ err ++ " at " ++ diff --git a/cassava.cabal b/cassava.cabal index 08f1606..c04810f 100644 --- a/cassava.cabal +++ b/cassava.cabal @@ -3,7 +3,7 @@ Version: 0.2.1.2 Synopsis: A CSV parsing and encoding library Description: A CSV parsing and encoding library optimized for ease of use and high - performance. + performance. It can encode/decode space-delimited data as well. Homepage: https://github.com/tibbe/cassava License: BSD3 License-file: LICENSE @@ -43,6 +43,7 @@ Library vector < 0.11 ghc-options: -Wall -O2 + ghc-prof-options: -auto-all if impl(ghc >= 7.2.1) cpp-options: -DGENERICS diff --git a/tests/UnitTests.hs b/tests/UnitTests.hs index 2231e0c..c47a2b3 100644 --- a/tests/UnitTests.hs +++ b/tests/UnitTests.hs @@ -26,8 +26,8 @@ import qualified Data.Csv.Streaming as S ------------------------------------------------------------------------ -- Parse tests -decodesAs :: BL.ByteString -> [[B.ByteString]] -> Assertion -decodesAs input expected = assertResult input expected $ decode False input +decodesAs :: (Bool -> BL.ByteString -> Either String Csv) -> BL.ByteString -> [[B.ByteString]] -> Assertion +decodesAs dec input expected = assertResult input expected $ dec False input decodesWithAs :: DecodeOptions -> BL.ByteString -> [[B.ByteString]] -> Assertion decodesWithAs opts input expected = @@ -41,9 +41,9 @@ assertResult input expected res = case res of " input: " ++ show (BL8.unpack input) ++ "\n" ++ "parse error: " ++ err -encodesAs :: [[B.ByteString]] -> BL.ByteString -> Assertion -encodesAs input expected = - encode (V.fromList (map V.fromList input)) @?= expected +encodesAs :: (Csv -> BL.ByteString) -> [[B.ByteString]] -> BL.ByteString -> Assertion +encodesAs enc input expected = + enc (V.fromList (map V.fromList input)) @?= expected encodesWithAs :: EncodeOptions -> [[B.ByteString]] -> BL.ByteString -> Assertion encodesWithAs opts input expected = @@ -73,10 +73,12 @@ recordsToList (S.Cons (Right x) rs) = case recordsToList rs of l@(Left _) -> l (Right xs) -> Right (x : xs) -decodesStreamingAs :: BL.ByteString -> [[B.ByteString]] -> Assertion -decodesStreamingAs input expected = +-- decodesStreamingAs :: BL.ByteString -> [[B.ByteString]] -> Assertion +decodesStreamingAs :: (Bool -> BL8.ByteString -> S.Records [B.ByteString]) + -> BL8.ByteString -> [[B.ByteString]] -> Assertion +decodesStreamingAs dec input expected = assertResult input expected $ fmap (V.fromList . map V.fromList) $ - recordsToList $ S.decode False input + recordsToList $ dec False input decodesWithStreamingAs :: DecodeOptions -> BL.ByteString -> [[B.ByteString]] -> Assertion @@ -100,12 +102,14 @@ namedDecodesStreamingAs input ehdr expected = case S.decodeByName input of positionalTests :: [TF.Test] positionalTests = + -- Encode CSV [ testGroup "encode" $ map encodeTest [ ("simple", [["abc"]], "abc\r\n") , ("quoted", [["\"abc\""]], "\"\"\"abc\"\"\"\r\n") , ("quote", [["a\"b"]], "\"a\"\"b\"\r\n") , ("quotedQuote", [["\"a\"b\""]], "\"\"\"a\"\"b\"\"\"\r\n") , ("leadingSpace", [[" abc"]], "\" abc\"\r\n") + , ("leadingTab", [["\tabc"]], "\"\tabc\"\r\n") , ("comma", [["abc,def"]], "\"abc,def\"\r\n") , ("twoFields", [["abc","def"]], "abc,def\r\n") , ("twoRecords", [["abc"], ["def"]], "abc\r\ndef\r\n") @@ -115,11 +119,28 @@ positionalTests = [ testCase "tab-delim" $ encodesWithAs (defEnc { encDelimiter = 9 }) [["1", "2"]] "1\t2\r\n" ] - , testGroup "decode" $ map decodeTest decodeTests + -- Encode space-delimited data + , testGroup "encode table" $ map encodeTableTest + [ ("simple", [["abc"]], "abc\r\n") + , ("empty", [[""]], "\"\"\r\n") + , ("quoted", [["\"abc\""]], "\"\"\"abc\"\"\"\r\n") + , ("quote", [["a\"b"]], "\"a\"\"b\"\r\n") + , ("quotedQuote", [["\"a\"b\""]], "\"\"\"a\"\"b\"\"\"\r\n") + , ("leadingSpace", [[" abc"]], "\" abc\"\r\n") + , ("leadingTab", [["\tabc"]], "\"\tabc\"\r\n") + , ("comma", [["abc,def"]], "\"abc,def\"\r\n") + , ("twoFields", [["abc","def"]], "abc\tdef\r\n") + , ("twoRecords", [["abc"], ["def"]], "abc\r\ndef\r\n") + , ("newline", [["abc\ndef"]], "\"abc\ndef\"\r\n") + ] + -- Decode CSV & space delimited data + , testGroup "decode" $ map decodeTest decodeTests , testGroup "decodeWith" $ map decodeWithTest decodeWithTests + , testGroup "decode table" $ map decodeTableTest decodeTableTests , testGroup "streaming" - [ testGroup "decode" $ map streamingDecodeTest decodeTests - , testGroup "decodeWith" $ map streamingDecodeWithTest decodeWithTests + [ testGroup "decode" $ map streamingDecodeTest decodeTests + , testGroup "decodeWith" $ map streamingDecodeWithTest decodeWithTests + , testGroup "table" $ map streamingDecodeTableTest decodeTableTests ] ] where @@ -145,15 +166,34 @@ positionalTests = decodeWithTests = [ ("tab-delim", defDec { decDelimiter = 9 }, "1\t2", [["1", "2"]]) ] + decodeTableTests = + [ ("simple", "a b c\n", [["a", "b", "c"]]) + , ("crlf", "a b\r\nc d\r\n", [["a", "b"], ["c", "d"]]) + , ("noEol", "a b c", [["a", "b", "c"]]) + , ("blankLine", "a b c\n\nd e f\n\n", + [["a", "b", "c"], ["d", "e", "f"]]) + , ("leadingSpace", " a b c\n", [["a", "b", "c"]]) + , ("trailingSpace", "a b c \n", [["a", "b", "c"]]) + , ("emptyField", "a \"\"\n", [["a", ""]]) + ] + -- Encode encodeTest (name, input, expected) = - testCase name $ input `encodesAs` expected + testCase name $ encodesAs encode input expected + encodeTableTest (name, input, expected) = + testCase name $ encodesAs encodeTable input expected + -- Decode decodeTest (name, input, expected) = - testCase name $ input `decodesAs` expected + testCase name $ decodesAs decode input expected + decodeTableTest (name, input, expected) = + testCase name $ decodesAs decodeTable input expected decodeWithTest (name, opts, input, expected) = testCase name $ decodesWithAs opts input expected + -- Streaming streamingDecodeTest (name, input, expected) = - testCase name $ input `decodesStreamingAs` expected + testCase name $ decodesStreamingAs S.decode input expected + streamingDecodeTableTest (name, input, expected) = + testCase name $ decodesStreamingAs S.decodeTable input expected streamingDecodeWithTest (name, opts, input, expected) = testCase name $ decodesWithStreamingAs opts input expected defEnc = defaultEncodeOptions @@ -209,7 +249,7 @@ instance Arbitrary LT.Text where -- empty line (which we will ignore.) We therefore encode at least two -- columns. roundTrip :: (Eq a, FromField a, ToField a) => a -> Bool -roundTrip x = Right record == decode False (encode record) +roundTrip x = Right record == decode False (encode record) where record = V.singleton (x, dummy) dummy = 'a'