From 359b69153bcc3f187c485e90f733b74d4e9a5c19 Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Sat, 20 Oct 2012 20:05:42 +0400
Subject: [PATCH 01/21] Add parsers for space-delimited data files

---
 Data/Csv/Parser.hs | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/Data/Csv/Parser.hs b/Data/Csv/Parser.hs
index aaba190..5d5e1d5 100644
--- a/Data/Csv/Parser.hs
+++ b/Data/Csv/Parser.hs
@@ -16,12 +16,16 @@
 module Data.Csv.Parser
     ( DecodeOptions(..)
     , defaultDecodeOptions
+      -- * CSV
     , csv
     , csvWithHeader
     , header
     , record
     , name
     , field
+      -- * Tables
+    , table
+    , recordTable
     ) where
 
 import Blaze.ByteString.Builder (fromByteString, toByteString)
@@ -133,6 +137,37 @@ unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote &&
                                             c /= delim &&
                                             c /= cr)
 
+-- |
+table :: AL.Parser Csv
+table = do
+  vals <- recordTable `sepBy1` endOfLine
+  _    <- optional endOfLine
+  endOfInput
+  return $ V.fromList $ removeBlankLines vals
+{-# INLINE table #-}
+
+-- | Parse record for space-separated files.
+recordTable :: AL.Parser Record
+recordTable = V.fromList <$> fieldTable `sepBy1` delimTable
+{-# INLINE recordTable #-}
+
+fieldTable :: AL.Parser Field
+fieldTable = do
+  mb <- A.peekWord8
+  case mb of
+    Just b | b == doubleQuote -> escapedField
+    _                         -> unescapedFieldTable
+
+unescapedFieldTable :: AL.Parser Field
+unescapedFieldTable = A.takeWhile (\c -> c /= doubleQuote &&
+                                        c /= newline     &&
+                                        c /= cr          &&
+                                        c /= 9           &&
+                                        c /= 32          )
+
+delimTable :: AL.Parser ()
+delimTable = () <$ AL.many1 (A.satisfy $ \c -> c == 32 || c == 9)
+
 dquote :: AL.Parser Char
 dquote = char '"'
 

From 6800c8b1ed4c560acd831ab4df50a2e70d751acc Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Sat, 20 Oct 2012 20:20:17 +0400
Subject: [PATCH 02/21] Add function to decode space-delimited files

---
 Data/Csv.hs          |  3 +++
 Data/Csv/Encoding.hs | 12 +++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/Data/Csv.hs b/Data/Csv.hs
index dfb4dcb..e839a0e 100644
--- a/Data/Csv.hs
+++ b/Data/Csv.hs
@@ -32,6 +32,9 @@ module Data.Csv
     , encodeWith
     , encodeByNameWith
 
+    -- * Space-delimited
+    , decodeTable
+
     -- * Core CSV types
     , Csv
     , Record
diff --git a/Data/Csv/Encoding.hs b/Data/Csv/Encoding.hs
index 1ced964..b6503d8 100644
--- a/Data/Csv/Encoding.hs
+++ b/Data/Csv/Encoding.hs
@@ -26,6 +26,9 @@ module Data.Csv.Encoding
     , defaultEncodeOptions
     , encodeWith
     , encodeByNameWith
+
+    -- * Space-delimited files
+    , decodeTable
     ) where
 
 import Blaze.ByteString.Builder
@@ -184,4 +187,11 @@ decodeWithP p to s =
           Error msg -> Left $ "conversion error: " ++ msg
       AL.Fail left _ msg -> Left $ "parse error (" ++ msg ++ ") at " ++
                             show (BL8.unpack left)
-{-# INLINE decodeWithP #-}
\ No newline at end of file
+{-# INLINE decodeWithP #-}
+
+
+------------------------------------------------------------------------
+-- * Space-delimited files
+
+decodeTable :: FromRecord a => L.ByteString -> Either String (Vector a)
+decodeTable = decodeWithP table (parse . traverse parseRecord)

From 05398ba364f245a2b3fcc6f0b58f9278d44dc95b Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Mon, 22 Oct 2012 19:52:19 +0400
Subject: [PATCH 03/21] Handle leading and trailing spaces

---
 Data/Csv/Parser.hs | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/Data/Csv/Parser.hs b/Data/Csv/Parser.hs
index 5d5e1d5..5fc3f41 100644
--- a/Data/Csv/Parser.hs
+++ b/Data/Csv/Parser.hs
@@ -146,9 +146,24 @@ table = do
   return $ V.fromList $ removeBlankLines vals
 {-# INLINE table #-}
 
--- | Parse record for space-separated files.
+-- | Parse record for space-separated files. It's more complicated
+--   that CSV parser because we need to drop both
 recordTable :: AL.Parser Record
-recordTable = V.fromList <$> fieldTable `sepBy1` delimTable
+recordTable
+  = V.fromList <$>
+    ((delimTable <|> pure ()) *>
+     (fieldTable `sepBy11` delimTable)
+    )
+  where
+    sepBy11 p s = liftA2 (:) p scan
+      where
+        scan =  s *> (([] <$ eol) <|> (liftA2 (:) p scan))
+            <|> pure []
+        eol = ([] <$ endOfInput) <|> do
+          mb <- AL.peekWord8
+          case mb of
+            Just b | b == newline || b == cr -> pure []
+            _                                -> empty
 {-# INLINE recordTable #-}
 
 fieldTable :: AL.Parser Field

From 805bb75d7820324498f0ac3748fb3eff0f9225a8 Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Tue, 23 Oct 2012 14:55:02 +0400
Subject: [PATCH 04/21] Add ghc-prof-options to gather sensible profiling
 information

---
 cassava.cabal | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cassava.cabal b/cassava.cabal
index 424943d..7c3bc00 100644
--- a/cassava.cabal
+++ b/cassava.cabal
@@ -40,6 +40,7 @@ Library
                        vector
 
   ghc-options:         -Wall -O2
+  ghc-prof-options:    -auto-all
 
   if impl(ghc >= 7.2.1)
     cpp-options:       -DGENERICS

From b5061961cb2f124892f7ee5fe8c81817024b5627 Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Tue, 23 Oct 2012 15:04:10 +0400
Subject: [PATCH 05/21] Use named constants

---
 Data/Csv/Parser.hs | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/Data/Csv/Parser.hs b/Data/Csv/Parser.hs
index 5fc3f41..dd45930 100644
--- a/Data/Csv/Parser.hs
+++ b/Data/Csv/Parser.hs
@@ -175,13 +175,13 @@ fieldTable = do
 
 unescapedFieldTable :: AL.Parser Field
 unescapedFieldTable = A.takeWhile (\c -> c /= doubleQuote &&
-                                        c /= newline     &&
-                                        c /= cr          &&
-                                        c /= 9           &&
-                                        c /= 32          )
+                                         c /= newline     &&
+                                         c /= cr          &&
+                                         c /= tab         &&
+                                         c /= wspace      )
 
 delimTable :: AL.Parser ()
-delimTable = () <$ AL.many1 (A.satisfy $ \c -> c == 32 || c == 9)
+delimTable = () <$ AL.many1 (A.satisfy $ \c -> c == wspace || c == tab)
 
 dquote :: AL.Parser Char
 dquote = char '"'
@@ -201,7 +201,9 @@ unescape = toByteString <$> go mempty where
       then return (acc `mappend` fromByteString h)
       else rest
 
-doubleQuote, newline, cr :: Word8
+doubleQuote, newline, cr, tab, wspace :: Word8
 doubleQuote = 34
-newline = 10
-cr = 13
+newline     = 10
+cr          = 13
+tab         = 9
+wspace      = 32

From 9185c6ff5b1d9b7d05a1c44167c1c8ee8036f6ae Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Tue, 23 Oct 2012 15:27:08 +0400
Subject: [PATCH 06/21] Use takeWhile1 it slightly improves performance

---
 Data/Csv/Parser.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Data/Csv/Parser.hs b/Data/Csv/Parser.hs
index dd45930..27ad102 100644
--- a/Data/Csv/Parser.hs
+++ b/Data/Csv/Parser.hs
@@ -181,7 +181,7 @@ unescapedFieldTable = A.takeWhile (\c -> c /= doubleQuote &&
                                          c /= wspace      )
 
 delimTable :: AL.Parser ()
-delimTable = () <$ AL.many1 (A.satisfy $ \c -> c == wspace || c == tab)
+delimTable = () <$ A.takeWhile1 (\c -> c == wspace || c == tab)
 
 dquote :: AL.Parser Char
 dquote = char '"'

From 5910b82395d5267e348bd50c0f4ee08627165f0f Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Fri, 26 Oct 2012 22:07:34 +0400
Subject: [PATCH 07/21] Add named decoders for tables

---
 Data/Csv.hs        |  2 +-
 Data/Csv/Parser.hs | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/Data/Csv.hs b/Data/Csv.hs
index e839a0e..ebd7a74 100644
--- a/Data/Csv.hs
+++ b/Data/Csv.hs
@@ -32,7 +32,7 @@ module Data.Csv
     , encodeWith
     , encodeByNameWith
 
-    -- * Space-delimited
+    -- * Space-delimited files
     , decodeTable
 
     -- * Core CSV types
diff --git a/Data/Csv/Parser.hs b/Data/Csv/Parser.hs
index 27ad102..d2f51e5 100644
--- a/Data/Csv/Parser.hs
+++ b/Data/Csv/Parser.hs
@@ -25,7 +25,11 @@ module Data.Csv.Parser
     , field
       -- * Tables
     , table
+    , tableWithHeader
+    , tableHeader
     , recordTable
+    , tableName
+    , fieldTable
     ) where
 
 import Blaze.ByteString.Builder (fromByteString, toByteString)
@@ -146,6 +150,21 @@ table = do
   return $ V.fromList $ removeBlankLines vals
 {-# INLINE table #-}
 
+tableWithHeader :: AL.Parser (Header, V.Vector NamedRecord)
+tableWithHeader = do
+    hdr  <- tableHeader
+    vals <- map (toNamedRecord hdr) . removeBlankLines <$>
+            recordTable `sepBy1` endOfLine
+    _ <- optional endOfLine
+    endOfInput
+    return (hdr, V.fromList vals)
+
+tableHeader :: AL.Parser Record
+tableHeader = recordTable <* endOfLine
+
+tableName :: AL.Parser Field
+tableName = fieldTable
+
 -- | Parse record for space-separated files. It's more complicated
 --   that CSV parser because we need to drop both
 recordTable :: AL.Parser Record

From cbae628ed8213901ac545cc4c2ad05bdc38f144c Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Tue, 25 Dec 2012 18:30:19 +0400
Subject: [PATCH 08/21] Fix build

---
 Data/Csv/Encoding.hs | 22 +++++++++++++---------
 Data/Csv/Parser.hs   | 11 ++++++-----
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/Data/Csv/Encoding.hs b/Data/Csv/Encoding.hs
index 89c3c1d..60997b0 100644
--- a/Data/Csv/Encoding.hs
+++ b/Data/Csv/Encoding.hs
@@ -100,7 +100,8 @@ decodeWith :: FromRecord a
                              -- skipped
            -> L.ByteString   -- ^ CSV data
            -> Either String (Vector a)
-decodeWith = decodeWithC (runParser . parseCsv)
+decodeWith !opt =
+  decodeWithC (header $ decDelimiter opt) (csv opt) (runParser . parseCsv)
 {-# INLINE [1] decodeWith #-}
 
 parseCsv :: FromRecord a => Csv -> Parser (Vector a)
@@ -124,14 +125,16 @@ mapM' f = go
 -- conversion is performed.
 idDecodeWith :: DecodeOptions -> Bool -> L.ByteString
              -> Either String (Vector (Vector B.ByteString))
-idDecodeWith = decodeWithC pure
+idDecodeWith !opt = decodeWithC (header $ decDelimiter opt) (csv opt) pure
 
-decodeWithC :: (Csv -> Either String a) -> DecodeOptions -> Bool -> L.ByteString
-            -> Either String a
-decodeWithC convert !opts skipHeader = decodeWithP parser convert
+decodeWithC :: AL.Parser Header -> AL.Parser Csv
+            -> (Csv -> Either String a)
+            -> Bool -> L.ByteString -> Either String a
+decodeWithC headerP body convert skipHeader
+  = decodeWithP parser convert
   where parser
-            | skipHeader = header (decDelimiter opts) *> csv opts
-            | otherwise  = csv opts
+            | skipHeader = headerP *> body
+            | otherwise  = body
 {-# INLINE decodeWithC #-}
 
 -- | Like 'decodeByName', but lets you customize how the CSV data is
@@ -254,5 +257,6 @@ decodeWithP p to s =
 ------------------------------------------------------------------------
 -- * Space-delimited files
 
-decodeTable :: FromRecord a => L.ByteString -> Either String (Vector a)
-decodeTable = decodeWithP table (parse . traverse parseRecord)
+decodeTable :: FromRecord a => Bool -> L.ByteString -> Either String (Vector a)
+decodeTable =
+  decodeWithC tableHeader table (runParser . parseCsv)
diff --git a/Data/Csv/Parser.hs b/Data/Csv/Parser.hs
index 72d55cc..e8f16cf 100644
--- a/Data/Csv/Parser.hs
+++ b/Data/Csv/Parser.hs
@@ -34,8 +34,8 @@ module Data.Csv.Parser
 
 import Blaze.ByteString.Builder (fromByteString, toByteString)
 import Blaze.ByteString.Builder.Char.Utf8 (fromChar)
-import Control.Applicative (Alternative, (*>), (<$>), (<*), (<|>), optional,
-                            pure)
+import Control.Applicative (Alternative, (*>), (<$>), (<*), (<|>), (<$), optional,
+                            pure, liftA2, empty)
 import Data.Attoparsec.Char8 (char, endOfInput, endOfLine)
 import qualified Data.Attoparsec as A
 import qualified Data.Attoparsec.Lazy as AL
@@ -176,7 +176,7 @@ unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote &&
 -- |
 table :: AL.Parser Csv
 table = do
-  vals <- recordTable `sepBy1` endOfLine
+  vals <- recordTable `AL.sepBy1` endOfLine
   _    <- optional endOfLine
   endOfInput
   return $ V.fromList $ removeBlankLines vals
@@ -186,7 +186,7 @@ tableWithHeader :: AL.Parser (Header, V.Vector NamedRecord)
 tableWithHeader = do
     hdr  <- tableHeader
     vals <- map (toNamedRecord hdr) . removeBlankLines <$>
-            recordTable `sepBy1` endOfLine
+            recordTable `AL.sepBy1` endOfLine
     _ <- optional endOfLine
     endOfInput
     return (hdr, V.fromList vals)
@@ -198,7 +198,8 @@ tableName :: AL.Parser Field
 tableName = fieldTable
 
 -- | Parse record for space-separated files. It's more complicated
---   that CSV parser because we need to drop both
+--   that CSV parser because we need to drop both leading and trailing
+--   spaces.
 recordTable :: AL.Parser Record
 recordTable
   = V.fromList <$>

From 09b73f739e633c15ba9e6a272da5d5eb28a1c22b Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Tue, 25 Dec 2012 18:44:10 +0400
Subject: [PATCH 09/21] Add decoding of space-delimited data with header

---
 Data/Csv/Encoding.hs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/Data/Csv/Encoding.hs b/Data/Csv/Encoding.hs
index 60997b0..d5559a2 100644
--- a/Data/Csv/Encoding.hs
+++ b/Data/Csv/Encoding.hs
@@ -29,6 +29,7 @@ module Data.Csv.Encoding
 
     -- * Space-delimited files
     , decodeTable
+    , decodeTableByName
     ) where
 
 import Blaze.ByteString.Builder (Builder, fromByteString, fromWord8,
@@ -259,4 +260,9 @@ decodeWithP p to s =
 
 decodeTable :: FromRecord a => Bool -> L.ByteString -> Either String (Vector a)
 decodeTable =
-  decodeWithC tableHeader table (runParser . parseCsv)
+    decodeWithC tableHeader table (runParser . parseCsv)
+
+decodeTableByName :: FromNamedRecord a => L.ByteString -> Either String (Header, Vector a)
+decodeTableByName =
+    decodeWithP tableWithHeader
+    (\(hdr, vs) -> (,) <$> pure hdr <*> (runParser $ parseNamedCsv vs))

From 953b09d0caa3583296e266da25c251ee580d124d Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Tue, 25 Dec 2012 18:45:15 +0400
Subject: [PATCH 10/21] Escape tab too

---
 Data/Csv/Encoding.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Data/Csv/Encoding.hs b/Data/Csv/Encoding.hs
index d5559a2..2f59fc7 100644
--- a/Data/Csv/Encoding.hs
+++ b/Data/Csv/Encoding.hs
@@ -191,7 +191,7 @@ encodeRecord delim = mconcat . intersperse (fromWord8 delim)
 escape :: B.ByteString -> B.ByteString
 escape s
     | B.find (\ b -> b == dquote || b == comma || b == nl || b == cr ||
-                     b == sp) s == Nothing = s
+                     b == sp     || b == tab) s == Nothing = s
     | otherwise =
         B.concat ["\"",
                   B.concatMap
@@ -203,6 +203,7 @@ escape s
     nl     = 10
     cr     = 13
     sp     = 32
+    tab    =  9
 
 -- | Like 'encodeByName', but lets you customize how the CSV data is
 -- encoded.

From 7927d9cbf697d456da6ca9835cc498ea652253e7 Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Tue, 25 Dec 2012 18:57:13 +0400
Subject: [PATCH 11/21] Add encoding of space-delimited data

---
 Data/Csv/Encoding.hs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/Data/Csv/Encoding.hs b/Data/Csv/Encoding.hs
index 2f59fc7..4008133 100644
--- a/Data/Csv/Encoding.hs
+++ b/Data/Csv/Encoding.hs
@@ -30,6 +30,8 @@ module Data.Csv.Encoding
     -- * Space-delimited files
     , decodeTable
     , decodeTableByName
+    , encodeTable
+    , encodeTableByName
     ) where
 
 import Blaze.ByteString.Builder (Builder, fromByteString, fromWord8,
@@ -267,3 +269,26 @@ decodeTableByName :: FromNamedRecord a => L.ByteString -> Either String (Header,
 decodeTableByName =
     decodeWithP tableWithHeader
     (\(hdr, vs) -> (,) <$> pure hdr <*> (runParser $ parseNamedCsv vs))
+
+encodeTable :: ToRecord a => V.Vector a -> L.ByteString
+encodeTable = toLazyByteString
+            . unlines
+            . map (encodeTableRow . toRecord)
+            . V.toList
+{-# INLINE encodeTable #-}
+
+encodeTableByName :: ToNamedRecord a => Header -> V.Vector a -> L.ByteString
+encodeTableByName hdr v =
+    toLazyByteString (  encodeTableRow hdr
+                     <> fromByteString "\r\n"
+                     <> records )
+  where
+    records = unlines
+            . map (encodeTableRow . namedRecordToRecord hdr . toNamedRecord)
+            . V.toList $ v
+{-# INLINE encodeTableByName #-}
+
+encodeTableRow :: Record -> Builder
+encodeTableRow = mconcat . intersperse (fromWord8 9)
+               . map fromByteString . map escape . V.toList
+{-# INLINE encodeTableRow #-}

From 1bf95bd35d5489f6393df2c9432a3600edf7d3c0 Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Wed, 26 Dec 2012 20:44:01 +0400
Subject: [PATCH 12/21] Use more consistent naming

---
 Data/Csv/Parser.hs | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/Data/Csv/Parser.hs b/Data/Csv/Parser.hs
index e8f16cf..595234b 100644
--- a/Data/Csv/Parser.hs
+++ b/Data/Csv/Parser.hs
@@ -27,9 +27,9 @@ module Data.Csv.Parser
     , table
     , tableWithHeader
     , tableHeader
-    , recordTable
+    , tableRecord
     , tableName
-    , fieldTable
+    , tableField
     ) where
 
 import Blaze.ByteString.Builder (fromByteString, toByteString)
@@ -176,7 +176,7 @@ unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote &&
 -- |
 table :: AL.Parser Csv
 table = do
-  vals <- recordTable `AL.sepBy1` endOfLine
+  vals <- tableRecord `AL.sepBy1` endOfLine
   _    <- optional endOfLine
   endOfInput
   return $ V.fromList $ removeBlankLines vals
@@ -186,25 +186,25 @@ tableWithHeader :: AL.Parser (Header, V.Vector NamedRecord)
 tableWithHeader = do
     hdr  <- tableHeader
     vals <- map (toNamedRecord hdr) . removeBlankLines <$>
-            recordTable `AL.sepBy1` endOfLine
+            tableRecord `AL.sepBy1` endOfLine
     _ <- optional endOfLine
     endOfInput
     return (hdr, V.fromList vals)
 
 tableHeader :: AL.Parser Record
-tableHeader = recordTable <* endOfLine
+tableHeader = tableRecord <* endOfLine
 
 tableName :: AL.Parser Field
-tableName = fieldTable
+tableName = tableField
 
 -- | Parse record for space-separated files. It's more complicated
 --   that CSV parser because we need to drop both leading and trailing
 --   spaces.
-recordTable :: AL.Parser Record
-recordTable
+tableRecord :: AL.Parser Record
+tableRecord
   = V.fromList <$>
     ((delimTable <|> pure ()) *>
-     (fieldTable `sepBy11` delimTable)
+     (tableField `sepBy11` delimTable)
     )
   where
     sepBy11 p s = liftA2 (:) p scan
@@ -216,10 +216,10 @@ recordTable
           case mb of
             Just b | b == newline || b == cr -> pure []
             _                                -> empty
-{-# INLINE recordTable #-}
+{-# INLINE tableRecord #-}
 
-fieldTable :: AL.Parser Field
-fieldTable = do
+tableField :: AL.Parser Field
+tableField = do
   mb <- A.peekWord8
   case mb of
     Just b | b == doubleQuote -> escapedField

From 5dec6799f1ed874ffd658856ab51a68c75d466da Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Wed, 26 Dec 2012 21:05:03 +0400
Subject: [PATCH 13/21] Add space-delimited data decoding for incremental API

---
 Data/Csv/Incremental.hs | 66 +++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 13 deletions(-)

diff --git a/Data/Csv/Incremental.hs b/Data/Csv/Incremental.hs
index 8188ce4..89ec17b 100644
--- a/Data/Csv/Incremental.hs
+++ b/Data/Csv/Incremental.hs
@@ -8,8 +8,11 @@ module Data.Csv.Incremental
     (
     -- * Decoding headers
       HeaderParser(..)
+    -- ** CSV
     , decodeHeader
     , decodeHeaderWith
+    -- **
+    , decodeTableHeader
     -- ** Providing input
     -- $feed-header
     , feedChunkH
@@ -23,11 +26,13 @@ module Data.Csv.Incremental
     -- $indexbased
     , decode
     , decodeWith
+    , decodeTable
 
     -- ** Name-based record conversion
     -- $namebased
     , decodeByName
     , decodeByNameWith
+    , decodeTableByName
 
     -- ** Providing input
     -- $feed-records
@@ -132,10 +137,16 @@ decodeHeader = decodeHeaderWith defaultDecodeOptions
 -- | Like 'decodeHeader', but lets you customize how the CSV data is
 -- parsed.
 decodeHeaderWith :: DecodeOptions -> HeaderParser B.ByteString
-decodeHeaderWith !opts = PartialH (go . parser)
-  where
-    parser = A.parse (header $ decDelimiter opts)
+decodeHeaderWith !opts = parseWithH (header $ decDelimiter opts)
+
+-- | Parse header for space-delimited data. It have same semantic as
+-- 'decodeHeader'.
+decodeTableHeader :: HeaderParser B.ByteString
+decodeTableHeader = parseWithH tableHeader
 
+parseWithH :: A.Parser Header -> HeaderParser B.ByteString
+parseWithH parser = PartialH (go . A.parse parser)
+  where
     go (A.Fail rest _ msg) = FailH rest err
       where err = "parse error (" ++ msg ++ ")"
     -- TODO: Check empty and give attoparsec one last chance to return
@@ -143,6 +154,8 @@ decodeHeaderWith !opts = PartialH (go . parser)
     go (A.Partial k)       = PartialH $ \ s -> go (k s)
     go (A.Done rest r)     = DoneH r rest
 
+
+
 ------------------------------------------------------------------------
 -- * Decoding records
 
@@ -229,12 +242,27 @@ decodeWith :: FromRecord a
            -> Bool           -- ^ Data contains header that should be
                              -- skipped
            -> Parser a
-decodeWith !opts skipHeader
-    | skipHeader = Partial $ \ s -> go (decodeHeaderWith opts `feedChunkH` s)
-    | otherwise  = Partial (decodeWithP parseRecord opts)
+decodeWith !opts = parseSimple (header d) (record d)
+  where
+    d = decDelimiter opts
+
+-- | Efficiently deserialize space-delimited data.
+decodeTable :: FromRecord a
+            => Bool          -- ^ Data contains header that should be
+                             -- skipped
+            -> Parser a
+decodeTable = parseSimple tableHeader tableRecord
+
+parseSimple :: FromRecord a => A.Parser Header -> A.Parser Record -> Bool -> Parser a
+parseSimple headP body skipHeader
+    | skipHeader = Partial $ \ s -> go (parseWithH headP `feedChunkH` s)
+    | otherwise  = Partial (decodeWithP body parseRecord)
   where go (FailH rest msg) = Fail rest msg
         go (PartialH k)     = Partial $ \ s' -> go (k s')
-        go (DoneH _ rest)   = decodeWithP parseRecord opts rest
+        go (DoneH _ rest)   = decodeWithP body parseRecord rest
+
+
+
 
 ------------------------------------------------------------------------
 
@@ -251,13 +279,23 @@ decodeByName = decodeByNameWith defaultDecodeOptions
 decodeByNameWith :: FromNamedRecord a
                  => DecodeOptions  -- ^ Decoding options
                  -> HeaderParser (Parser a)
-decodeByNameWith !opts =
-    PartialH (go . (decodeHeaderWith opts `feedChunkH`))
+decodeByNameWith !opts = parseNamed (header d) (record d)
+  where
+    d = decDelimiter opts
+
+decodeTableByName :: FromNamedRecord a
+                  => HeaderParser (Parser a)
+decodeTableByName = undefined
+
+parseNamed :: FromNamedRecord a
+           => A.Parser Header -> A.Parser Record -> HeaderParser (Parser a)
+parseNamed headP body =
+    PartialH (go . (parseWithH headP `feedChunkH`))
   where
     go (FailH rest msg) = FailH rest msg
     go (PartialH k)     = PartialH $ \ s -> go (k s)
     go (DoneH hdr rest) =
-        DoneH hdr (decodeWithP (parseNamedRecord . toNamedRecord hdr) opts rest)
+        DoneH hdr (decodeWithP body (parseNamedRecord . toNamedRecord hdr) rest)
 
 -- Copied from Data.Csv.Parser
 toNamedRecord :: Header -> Record -> NamedRecord
@@ -266,9 +304,11 @@ toNamedRecord hdr v = HM.fromList . V.toList $ V.zip hdr v
 ------------------------------------------------------------------------
 
 -- | Like 'decode', but lets you customize how the CSV data is parsed.
-decodeWithP :: (Record -> Conversion.Parser a) -> DecodeOptions -> B.ByteString
+decodeWithP :: A.Parser Record
+            -> (Record -> Conversion.Parser a)
+            -> B.ByteString
             -> Parser a
-decodeWithP p !opts = go Incomplete [] . parser
+decodeWithP rowParser p = go Incomplete [] . parser
   where
     go !_ !acc (A.Fail rest _ msg)
         | null acc  = Fail rest err
@@ -295,7 +335,7 @@ decodeWithP p !opts = go Incomplete [] . parser
             acc' | blankLine r = acc
                  | otherwise   = convert r : acc
 
-    parser = A.parse (record (decDelimiter opts) <* (endOfLine <|> endOfInput))
+    parser = A.parse (rowParser <* (endOfLine <|> endOfInput))
     convert = runParser . p
 {-# INLINE decodeWithP #-}
 

From 2ace4142df48c98a405dd82e887950aa71ec22eb Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Wed, 26 Dec 2012 23:05:21 +0400
Subject: [PATCH 14/21] Add decoding of space-delimited data for streaming API

---
 Data/Csv/Streaming.hs | 49 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/Data/Csv/Streaming.hs b/Data/Csv/Streaming.hs
index b226d19..999fb0b 100644
--- a/Data/Csv/Streaming.hs
+++ b/Data/Csv/Streaming.hs
@@ -20,11 +20,13 @@ module Data.Csv.Streaming
     -- $indexbased
     , decode
     , decodeWith
+    , decodeTable
 
     -- ** Name-based record conversion
     -- $namebased
     , decodeByName
     , decodeByNameWith
+    , decodeTableByName
     ) where
 
 import Control.Applicative ((<$>), (<*>), pure)
@@ -37,7 +39,7 @@ import Prelude hiding (foldr)
 
 import Data.Csv.Conversion
 import Data.Csv.Incremental hiding (decode, decodeByName, decodeByNameWith,
-                                    decodeWith)
+                                    decodeWith, decodeTable, decodeTableByName)
 import qualified Data.Csv.Incremental as I
 import Data.Csv.Parser
 import Data.Csv.Types
@@ -132,9 +134,24 @@ decodeWith :: FromRecord a
                              -- skipped
            -> BL.ByteString  -- ^ CSV data
            -> Records a
-decodeWith !opts skipHeader s0 = case BL.toChunks s0 of
-    []     -> go [] (feedEndOfInput $ I.decodeWith opts skipHeader)
-    (s:ss) -> go ss (I.decodeWith opts skipHeader `feedChunk` s)
+decodeWith !opts skipHeader =
+  streamData $ I.decodeWith opts skipHeader
+
+-- | Efficiently deserialize space-delimited records in a streaming
+-- fashion.
+decodeTable :: FromRecord a
+            => Bool           -- ^ Data contains header that should be
+                              --   skipped
+            -> BL.ByteString  -- ^ Space-delimited data
+            -> Records a
+decodeTable skipHeader =
+  streamData $ I.decodeTable skipHeader
+
+
+streamData :: I.Parser a -> BL.ByteString -> Records a
+streamData parse s0 = case BL.toChunks s0 of
+    []     -> go [] (feedEndOfInput parse)
+    (s:ss) -> go ss (parse `feedChunk` s )
   where
     go ss (Done xs)       = foldr Cons (Nil Nothing (BL.fromChunks ss)) xs
     go ss (Fail rest err) = Nil (Just err) (BL.fromChunks (rest:ss))
@@ -143,6 +160,9 @@ decodeWith !opts skipHeader s0 = case BL.toChunks s0 of
     go [] (Some xs k)     = foldr Cons (go [] (k B.empty)) xs
     go (s:ss) (Some xs k) = foldr Cons (go ss (k s)) xs
 
+
+
+
 -- | Efficiently deserialize CSV in a streaming fashion. The data is
 -- assumed to be preceeded by a header. Returns @'Left' errMsg@ if
 -- parsing the header fails. Equivalent to @'decodeByNameWith'
@@ -160,9 +180,24 @@ decodeByNameWith :: FromNamedRecord a
                  => DecodeOptions  -- ^ Decoding options
                  -> BL.ByteString  -- ^ CSV data
                  -> Either String (Header, Records a)
-decodeByNameWith !opts s0 = case BL.toChunks s0 of
-    []     -> go [] (feedEndOfInputH $ I.decodeByNameWith opts)
-    (s:ss) -> go ss (I.decodeByNameWith opts `feedChunkH` s)
+decodeByNameWith !opts =
+  streamDataByName (I.decodeByNameWith opts)
+
+
+-- | Like 'decodeByName', but lets you customize how the CSV data is
+-- parsed.
+decodeTableByName :: FromNamedRecord a
+                  => BL.ByteString  -- ^ Space-delimited data
+                  -> Either String (Header, Records a)
+decodeTableByName =
+  streamDataByName I.decodeTableByName
+
+
+streamDataByName :: HeaderParser (I.Parser a)
+                 -> BL.ByteString -> Either String (Header, Records a)
+streamDataByName parser s0 = case BL.toChunks s0 of
+    []     -> go [] (feedEndOfInputH parser)
+    (s:ss) -> go ss (parser `feedChunkH` s)
   where
     go ss (DoneH hdr p)    = Right (hdr, go2 ss p)
     go ss (FailH rest err) = Left $ err ++ " at " ++

From bd7b4bad94b55f4ed0f936fc290d2957d44fe56e Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Wed, 2 Jan 2013 14:46:04 +0400
Subject: [PATCH 15/21] Add documentation

---
 Data/Csv.hs             | 25 +++++++++++++++++++++----
 Data/Csv/Encoding.hs    | 17 ++++++++++++++++-
 Data/Csv/Incremental.hs |  3 +++
 Data/Csv/Parser.hs      | 31 +++++++++++++++++++++++--------
 Data/Csv/Streaming.hs   |  5 +++--
 cassava.cabal           |  2 +-
 6 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/Data/Csv.hs b/Data/Csv.hs
index d7d1ec7..903b952 100644
--- a/Data/Csv.hs
+++ b/Data/Csv.hs
@@ -1,5 +1,9 @@
--- | This module implements encoding and decoding of CSV data. The
--- implementation is RFC 4180 compliant, with the following
+-- | This module implements encoding and decoding of CSV and
+-- space-delimited data. APIs for both format are essentially the
+-- same. They only differ in function names and space-delimited data
+-- doesn't have encoding\/decoding options.
+--
+-- The CSV implementation is RFC 4180 compliant, with the following
 -- extensions:
 --
 --  * Empty lines are ignored.
@@ -9,6 +13,16 @@
 --
 --  * Escaped fields may contain any characters (but double-quotes
 --    need to be escaped).
+--
+-- Space-delimited data don't have specification so following format
+-- is assumed:
+--
+--  * Fields are delimited by one or more tabs and spaces at the
+--    beginning and end of line are ignored.
+--
+--  * Empty lines are ignored.
+--
+--  * Escaping rules are same as with CSV
 module Data.Csv
     (
     -- * Usage example
@@ -20,7 +34,7 @@ module Data.Csv
     -- * Custom type conversions
     -- $customtypeconversions
 
-    -- * Encoding and decoding
+    -- * Encoding and decoding CSV
     -- $encoding
       decode
     , decodeByName
@@ -38,8 +52,11 @@ module Data.Csv
     , encodeWith
     , encodeByNameWith
 
-    -- * Space-delimited files
+    -- * Space-delimited data
     , decodeTable
+    , decodeTableByName
+    , encodeTable
+    , encodeTableByName
 
     -- * Core CSV types
     , Csv
diff --git a/Data/Csv/Encoding.hs b/Data/Csv/Encoding.hs
index 4008133..02322ff 100644
--- a/Data/Csv/Encoding.hs
+++ b/Data/Csv/Encoding.hs
@@ -261,15 +261,27 @@ decodeWithP p to s =
 ------------------------------------------------------------------------
 -- * Space-delimited files
 
-decodeTable :: FromRecord a => Bool -> L.ByteString -> Either String (Vector a)
+-- | Efficiently deserialize space-delimited records from a lazy
+-- ByteString. If this fails due to incomplete or invalid input,
+-- @'Left' msg@ is returned.
+decodeTable :: FromRecord a
+            => Bool             -- ^ Data contains header that should be skipped
+            -> L.ByteString     -- ^ Raw data
+            -> Either String (Vector a)
 decodeTable =
     decodeWithC tableHeader table (runParser . parseCsv)
 
+-- | Efficiently deserialize space-delimited records from a lazy
+-- ByteString. If this fails due to incomplete or invalid input,
+-- @'Left' msg@ is returned. The data is assumed to be preceeded by a
+-- header.
 decodeTableByName :: FromNamedRecord a => L.ByteString -> Either String (Header, Vector a)
 decodeTableByName =
     decodeWithP tableWithHeader
     (\(hdr, vs) -> (,) <$> pure hdr <*> (runParser $ parseNamedCsv vs))
 
+-- | Efficiently serialize space-delimited records as a lazy
+-- ByteString. Single tab is used as separator.
 encodeTable :: ToRecord a => V.Vector a -> L.ByteString
 encodeTable = toLazyByteString
             . unlines
@@ -277,6 +289,9 @@ encodeTable = toLazyByteString
             . V.toList
 {-# INLINE encodeTable #-}
 
+-- | Efficiently serialize space-delimited records as a lazy
+-- ByteString. The header is written before any records and dictates
+-- the field order. Single tab is used as separator.
 encodeTableByName :: ToNamedRecord a => Header -> V.Vector a -> L.ByteString
 encodeTableByName hdr v =
     toLazyByteString (  encodeTableRow hdr
diff --git a/Data/Csv/Incremental.hs b/Data/Csv/Incremental.hs
index 89ec17b..1155aac 100644
--- a/Data/Csv/Incremental.hs
+++ b/Data/Csv/Incremental.hs
@@ -283,6 +283,9 @@ decodeByNameWith !opts = parseNamed (header d) (record d)
   where
     d = decDelimiter opts
 
+-- | Efficiently deserialize space-delimited data in incremental
+-- fashion. The data is assumed to be preceeded by a header. Have same
+-- semantics as 'decodeByName'.
 decodeTableByName :: FromNamedRecord a
                   => HeaderParser (Parser a)
 decodeTableByName = undefined
diff --git a/Data/Csv/Parser.hs b/Data/Csv/Parser.hs
index 595234b..08e4a73 100644
--- a/Data/Csv/Parser.hs
+++ b/Data/Csv/Parser.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE BangPatterns, CPP #-}
 
--- | A CSV parser. The parser defined here is RFC 4180 compliant, with
--- the following extensions:
+-- | Parsers for CSV and space-delimited data. The CSV parser defined
+-- here is RFC 4180 compliant, with the following extensions:
 --
 --  * Empty lines are ignored.
 --
@@ -11,6 +11,16 @@
 --  * Escaped fields may contain any characters (but double-quotes
 --    need to be escaped).
 --
+-- Space-delimited data don't have specification so following format
+-- is assumed:
+--
+--  * Fields are delimited by one or more tabs and spaces at the
+--    beginning and end of line are ignored.
+--
+--  * Empty lines are ignored.
+--
+--  * Escaping rules are same as with CSV
+--
 -- The functions in this module can be used to implement e.g. a
 -- resumable parser that is fed input incrementally.
 module Data.Csv.Parser
@@ -23,7 +33,7 @@ module Data.Csv.Parser
     , record
     , name
     , field
-      -- * Tables
+      -- * Space-delimited data
     , table
     , tableWithHeader
     , tableHeader
@@ -51,7 +61,7 @@ import Data.Word (Word8)
 import Data.Csv.Types
 import Data.Csv.Util ((<$!>))
 
--- | Options that controls how data is decoded. These options can be
+-- | Options that controls how CSV data is decoded. These options can be
 -- used to e.g. decode tab-separated data instead of comma-separated
 -- data.
 --
@@ -173,7 +183,7 @@ unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote &&
                                             c /= delim &&
                                             c /= cr)
 
--- |
+-- | Parse space-delimited data which doesn't include header.
 table :: AL.Parser Csv
 table = do
   vals <- tableRecord `AL.sepBy1` endOfLine
@@ -182,6 +192,7 @@ table = do
   return $ V.fromList $ removeBlankLines vals
 {-# INLINE table #-}
 
+-- | Parse space-delimited data with header.
 tableWithHeader :: AL.Parser (Header, V.Vector NamedRecord)
 tableWithHeader = do
     hdr  <- tableHeader
@@ -191,15 +202,17 @@ tableWithHeader = do
     endOfInput
     return (hdr, V.fromList vals)
 
+-- | Parse header name.
 tableHeader :: AL.Parser Record
 tableHeader = tableRecord <* endOfLine
 
+-- | Parse header name. They have same format as regular 'tableField's.
 tableName :: AL.Parser Field
 tableName = tableField
 
--- | Parse record for space-separated files. It's more complicated
---   that CSV parser because we need to drop both leading and trailing
---   spaces.
+-- | Parse row for space-delimited data not including terminating line
+-- separator. It's more complicated that CSV parser because we need
+-- to drop both leading and trailing spaces.
 tableRecord :: AL.Parser Record
 tableRecord
   = V.fromList <$>
@@ -218,6 +231,8 @@ tableRecord
             _                                -> empty
 {-# INLINE tableRecord #-}
 
+-- | Parse field. It could be escaped or not escaped. The return value
+--   is escaped.
 tableField :: AL.Parser Field
 tableField = do
   mb <- A.peekWord8
diff --git a/Data/Csv/Streaming.hs b/Data/Csv/Streaming.hs
index 999fb0b..fcfbcd8 100644
--- a/Data/Csv/Streaming.hs
+++ b/Data/Csv/Streaming.hs
@@ -184,8 +184,9 @@ decodeByNameWith !opts =
   streamDataByName (I.decodeByNameWith opts)
 
 
--- | Like 'decodeByName', but lets you customize how the CSV data is
--- parsed.
+-- | Efficiently deserialize space-delimited in a streaming
+-- fashion. The data is assumed to be preceeded by a header. Returns
+-- @'Left' errMsg@ if parsing the header fails.
 decodeTableByName :: FromNamedRecord a
                   => BL.ByteString  -- ^ Space-delimited data
                   -> Either String (Header, Records a)
diff --git a/cassava.cabal b/cassava.cabal
index 2630456..c04810f 100644
--- a/cassava.cabal
+++ b/cassava.cabal
@@ -3,7 +3,7 @@ Version:             0.2.1.2
 Synopsis:            A CSV parsing and encoding library
 Description:
   A CSV parsing and encoding library optimized for ease of use and high
-  performance.
+  performance. It can encode/decode space-delimited data as well.
 Homepage:            https://github.com/tibbe/cassava
 License:             BSD3
 License-file:        LICENSE

From 7f6a544c0d617f7cea9424b7c3ec7e4be83df132 Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Wed, 2 Jan 2013 17:05:43 +0400
Subject: [PATCH 16/21] Add tests

---
 tests/UnitTests.hs | 53 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 10 deletions(-)

diff --git a/tests/UnitTests.hs b/tests/UnitTests.hs
index 2231e0c..76647d3 100644
--- a/tests/UnitTests.hs
+++ b/tests/UnitTests.hs
@@ -26,8 +26,8 @@ import qualified Data.Csv.Streaming as S
 ------------------------------------------------------------------------
 -- Parse tests
 
-decodesAs :: BL.ByteString -> [[B.ByteString]] -> Assertion
-decodesAs input expected = assertResult input expected $ decode False input
+decodesAs :: (Bool -> BL.ByteString -> Either String Csv) -> BL.ByteString -> [[B.ByteString]] -> Assertion
+decodesAs dec input expected = assertResult input expected $ dec False input
 
 decodesWithAs :: DecodeOptions -> BL.ByteString -> [[B.ByteString]] -> Assertion
 decodesWithAs opts input expected =
@@ -41,9 +41,9 @@ assertResult input expected res = case res of
                 "      input: " ++ show (BL8.unpack input) ++ "\n" ++
                 "parse error: " ++ err
 
-encodesAs :: [[B.ByteString]] -> BL.ByteString -> Assertion
-encodesAs input expected =
-    encode (V.fromList (map V.fromList input)) @?= expected
+encodesAs :: (Csv -> BL.ByteString) -> [[B.ByteString]] -> BL.ByteString -> Assertion
+encodesAs enc input expected =
+    enc (V.fromList (map V.fromList input)) @?= expected
 
 encodesWithAs :: EncodeOptions -> [[B.ByteString]] -> BL.ByteString -> Assertion
 encodesWithAs opts input expected =
@@ -100,12 +100,14 @@ namedDecodesStreamingAs input ehdr expected = case S.decodeByName input of
 
 positionalTests :: [TF.Test]
 positionalTests =
+      -- Encode CSV
     [ testGroup "encode" $ map encodeTest
       [ ("simple",       [["abc"]],          "abc\r\n")
       , ("quoted",       [["\"abc\""]],      "\"\"\"abc\"\"\"\r\n")
       , ("quote",        [["a\"b"]],         "\"a\"\"b\"\r\n")
       , ("quotedQuote",  [["\"a\"b\""]],     "\"\"\"a\"\"b\"\"\"\r\n")
       , ("leadingSpace", [[" abc"]],         "\" abc\"\r\n")
+      , ("leadingTab",   [["\tabc"]],         "\"\tabc\"\r\n")
       , ("comma",        [["abc,def"]],      "\"abc,def\"\r\n")
       , ("twoFields",    [["abc","def"]],    "abc,def\r\n")
       , ("twoRecords",   [["abc"], ["def"]], "abc\r\ndef\r\n")
@@ -115,12 +117,29 @@ positionalTests =
       [ testCase "tab-delim" $ encodesWithAs (defEnc { encDelimiter = 9 })
         [["1", "2"]] "1\t2\r\n"
       ]
-    , testGroup "decode" $ map decodeTest decodeTests
+      -- Encode space-delimited data
+    , testGroup "encode table" $ map encodeTableTest
+      [ ("simple",       [["abc"]],          "abc\r\n")
+      , ("empty",        [[""]],             "\"\"\r\n")
+      , ("quoted",       [["\"abc\""]],      "\"\"\"abc\"\"\"\r\n")
+      , ("quote",        [["a\"b"]],         "\"a\"\"b\"\r\n")
+      , ("quotedQuote",  [["\"a\"b\""]],     "\"\"\"a\"\"b\"\"\"\r\n")
+      , ("leadingSpace", [[" abc"]],         "\" abc\"\r\n")
+      , ("leadingTab",   [["\tabc"]],         "\"\tabc\"\r\n")
+      , ("comma",        [["abc,def"]],      "\"abc,def\"\r\n")
+      , ("twoFields",    [["abc","def"]],    "abc\tdef\r\n")
+      , ("twoRecords",   [["abc"], ["def"]], "abc\r\ndef\r\n")
+      , ("newline",      [["abc\ndef"]],     "\"abc\ndef\"\r\n")
+      ]
+      -- Decode CSV
+    , testGroup "decode"     $ map decodeTest decodeTests
     , testGroup "decodeWith" $ map decodeWithTest decodeWithTests
     , testGroup "streaming"
-      [ testGroup "decode" $ map streamingDecodeTest decodeTests
+      [ testGroup "decode"     $ map streamingDecodeTest decodeTests
       , testGroup "decodeWith" $ map streamingDecodeWithTest decodeWithTests
       ]
+      -- Decode space-delimited data
+    , testGroup "decode table" $ map decodeTableTest decodeTableTests
     ]
   where
     rfc4180Input = BL8.pack $
@@ -145,11 +164,25 @@ positionalTests =
     decodeWithTests =
         [ ("tab-delim", defDec { decDelimiter = 9 }, "1\t2", [["1", "2"]])
         ]
+    decodeTableTests =
+        [ ("simple",       "a b c\n",        [["a", "b", "c"]])
+        , ("crlf",         "a b\r\nc d\r\n", [["a", "b"], ["c", "d"]])
+        , ("noEol",        "a b c",          [["a", "b", "c"]])
+        , ("blankLine",    "a b c\n\nd e f\n\n",
+           [["a", "b", "c"], ["d", "e", "f"]])
+        , ("leadingSpace",  " a  b  c\n",   [["a", "b", "c"]])
+        , ("trailingSpace", "a  b  c  \n",  [["a", "b", "c"]])
+        , ("emptyField",    "a \"\"\n",     [["a", ""]])
+        ]
 
     encodeTest (name, input, expected) =
-        testCase name $ input `encodesAs` expected
+        testCase name $ encodesAs encode input expected
+    encodeTableTest (name, input, expected) =
+        testCase name $ encodesAs encodeTable input expected
     decodeTest (name, input, expected) =
-        testCase name $ input `decodesAs` expected
+        testCase name $ decodesAs decode input expected
+    decodeTableTest (name, input, expected) =
+        testCase name $ decodesAs decodeTable input expected
     decodeWithTest (name, opts, input, expected) =
         testCase name $ decodesWithAs opts input expected
     streamingDecodeTest (name, input, expected) =
@@ -209,7 +242,7 @@ instance Arbitrary LT.Text where
 -- empty line (which we will ignore.) We therefore encode at least two
 -- columns.
 roundTrip :: (Eq a, FromField a, ToField a) => a -> Bool
-roundTrip x = Right record == decode False (encode record) 
+roundTrip x = Right record == decode False (encode record)
   where record = V.singleton (x, dummy)
         dummy = 'a'
 

From 371bdd296a36a6b59abca7834d6e431d255d2d59 Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Wed, 2 Jan 2013 17:10:41 +0400
Subject: [PATCH 17/21] Fix escaping bug for empty strings

---
 Data/Csv/Encoding.hs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/Data/Csv/Encoding.hs b/Data/Csv/Encoding.hs
index 02322ff..f13d4e3 100644
--- a/Data/Csv/Encoding.hs
+++ b/Data/Csv/Encoding.hs
@@ -305,5 +305,12 @@ encodeTableByName hdr v =
 
 encodeTableRow :: Record -> Builder
 encodeTableRow = mconcat . intersperse (fromWord8 9)
-               . map fromByteString . map escape . V.toList
+               . map fromByteString . map escapeT . V.toList
+  where
+    -- We need to escape empty strings. Otherwise we'll get:
+    --  > encode ["a","","b"] = 'a  b'
+    -- instead of
+    --  > encode ["a","","b"] = 'a "" b'
+    escapeT b | B.null b  = "\"\""
+              | otherwise = escape b
 {-# INLINE encodeTableRow #-}

From 2ea024a362baaf33050e80d52ac8ea220bcdcc60 Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Wed, 2 Jan 2013 17:28:32 +0400
Subject: [PATCH 18/21] Test for streaming

---
 tests/UnitTests.hs | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/tests/UnitTests.hs b/tests/UnitTests.hs
index 76647d3..ccfcd46 100644
--- a/tests/UnitTests.hs
+++ b/tests/UnitTests.hs
@@ -73,10 +73,10 @@ recordsToList (S.Cons (Right x) rs) = case recordsToList rs of
     l@(Left _) -> l
     (Right xs) -> Right (x : xs)
 
-decodesStreamingAs :: BL.ByteString -> [[B.ByteString]] -> Assertion
-decodesStreamingAs input expected =
+-- decodesStreamingAs :: BL.ByteString -> [[B.ByteString]] -> Assertion
+decodesStreamingAs dec input expected =
     assertResult input expected $ fmap (V.fromList . map V.fromList) $
-    recordsToList $ S.decode False input
+    recordsToList $ dec False input
 
 decodesWithStreamingAs :: DecodeOptions -> BL.ByteString -> [[B.ByteString]]
                        -> Assertion
@@ -131,15 +131,15 @@ positionalTests =
       , ("twoRecords",   [["abc"], ["def"]], "abc\r\ndef\r\n")
       , ("newline",      [["abc\ndef"]],     "\"abc\ndef\"\r\n")
       ]
-      -- Decode CSV
+      -- Decode CSV & space delimited data
     , testGroup "decode"     $ map decodeTest decodeTests
     , testGroup "decodeWith" $ map decodeWithTest decodeWithTests
+    , testGroup "decode table" $ map decodeTableTest decodeTableTests
     , testGroup "streaming"
-      [ testGroup "decode"     $ map streamingDecodeTest decodeTests
-      , testGroup "decodeWith" $ map streamingDecodeWithTest decodeWithTests
+      [ testGroup "decode"     $ map streamingDecodeTest      decodeTests
+      , testGroup "decodeWith" $ map streamingDecodeWithTest  decodeWithTests
+      , testGroup "table"      $ map streamingDecodeTableTest decodeTableTests
       ]
-      -- Decode space-delimited data
-    , testGroup "decode table" $ map decodeTableTest decodeTableTests
     ]
   where
     rfc4180Input = BL8.pack $
@@ -175,18 +175,23 @@ positionalTests =
         , ("emptyField",    "a \"\"\n",     [["a", ""]])
         ]
 
+    -- Encode
     encodeTest (name, input, expected) =
         testCase name $ encodesAs encode input expected
     encodeTableTest (name, input, expected) =
         testCase name $ encodesAs encodeTable input expected
+    -- Decode
     decodeTest (name, input, expected) =
         testCase name $ decodesAs decode input expected
     decodeTableTest (name, input, expected) =
         testCase name $ decodesAs decodeTable input expected
     decodeWithTest (name, opts, input, expected) =
         testCase name $ decodesWithAs opts input expected
+    -- Streaming
     streamingDecodeTest (name, input, expected) =
-        testCase name $ input `decodesStreamingAs` expected
+        testCase name $ decodesStreamingAs S.decode input expected
+    streamingDecodeTableTest (name, input, expected) =
+        testCase name $ decodesStreamingAs S.decodeTable input expected
     streamingDecodeWithTest (name, opts, input, expected) =
         testCase name $ decodesWithStreamingAs opts input expected
     defEnc = defaultEncodeOptions

From 6e4ecc565a1690ca4848e50ebc8a535724d5bfcc Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Wed, 2 Jan 2013 17:35:48 +0400
Subject: [PATCH 19/21] Copy optimization from CSV functions

 * Add rule to rewrite to specialized function
 * Inline decoding functions
---
 Data/Csv/Encoding.hs | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/Data/Csv/Encoding.hs b/Data/Csv/Encoding.hs
index f13d4e3..63ba498 100644
--- a/Data/Csv/Encoding.hs
+++ b/Data/Csv/Encoding.hs
@@ -270,6 +270,17 @@ decodeTable :: FromRecord a
             -> Either String (Vector a)
 decodeTable =
     decodeWithC tableHeader table (runParser . parseCsv)
+{-# INLINE decodeTable #-}
+
+-- | Same as 'decodeWith', but more efficient as no type
+-- conversion is performed.
+idDecodeTable :: Bool -> L.ByteString -> Either String (Vector (Vector B.ByteString))
+idDecodeTable = decodeWithC tableHeader table pure
+
+{-# RULES
+    "idDecodeTable" decodeTable = idDecodeTable
+ #-}
+
 
 -- | Efficiently deserialize space-delimited records from a lazy
 -- ByteString. If this fails due to incomplete or invalid input,
@@ -279,6 +290,7 @@ decodeTableByName :: FromNamedRecord a => L.ByteString -> Either String (Header,
 decodeTableByName =
     decodeWithP tableWithHeader
     (\(hdr, vs) -> (,) <$> pure hdr <*> (runParser $ parseNamedCsv vs))
+{-# INLINE decodeTableByName #-}
 
 -- | Efficiently serialize space-delimited records as a lazy
 -- ByteString. Single tab is used as separator.

From 53a1d23635860ed14f18cd38d1ea7800dea5ac3d Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Wed, 2 Jan 2013 17:40:19 +0400
Subject: [PATCH 20/21] Move common functionality and helpers to the end of
 module

---
 Data/Csv/Encoding.hs | 144 ++++++++++++++++++++++---------------------
 1 file changed, 74 insertions(+), 70 deletions(-)

diff --git a/Data/Csv/Encoding.hs b/Data/Csv/Encoding.hs
index 63ba498..b1f9862 100644
--- a/Data/Csv/Encoding.hs
+++ b/Data/Csv/Encoding.hs
@@ -107,19 +107,6 @@ decodeWith !opt =
   decodeWithC (header $ decDelimiter opt) (csv opt) (runParser . parseCsv)
 {-# INLINE [1] decodeWith #-}
 
-parseCsv :: FromRecord a => Csv -> Parser (Vector a)
-parseCsv xs = V.fromList <$!> mapM' parseRecord (V.toList xs)
-
-mapM' :: Monad m => (a -> m b) -> [a] -> m [b]
-mapM' f = go
-  where
-    go [] = return []
-    go (x:xs) = do
-        !y <- f x
-        ys <- go xs
-        return (y : ys)
-{-# INLINE mapM' #-}
-
 {-# RULES
     "idDecodeWith" decodeWith = idDecodeWith
  #-}
@@ -150,9 +137,6 @@ decodeByNameWith !opts =
     decodeWithP (csvWithHeader opts)
     (\ (hdr, vs) -> (,) <$> pure hdr <*> (runParser $ parseNamedCsv vs))
 
-parseNamedCsv :: FromNamedRecord a => Vector NamedRecord -> Parser (Vector a)
-parseNamedCsv xs = V.fromList <$!> mapM' parseNamedRecord (V.toList xs)
-
 -- | Options that controls how data is encoded. These options can be
 -- used to e.g. encode data in a tab-separated format instead of in a
 -- comma-separated format.
@@ -189,24 +173,6 @@ encodeRecord delim = mconcat . intersperse (fromWord8 delim)
                      . map fromByteString . map escape . V.toList
 {-# INLINE encodeRecord #-}
 
--- TODO: Optimize
-escape :: B.ByteString -> B.ByteString
-escape s
-    | B.find (\ b -> b == dquote || b == comma || b == nl || b == cr ||
-                     b == sp     || b == tab) s == Nothing = s
-    | otherwise =
-        B.concat ["\"",
-                  B.concatMap
-                  (\ b -> if b == dquote then "\"\"" else B.singleton b) s,
-                  "\""]
-  where
-    dquote = 34
-    comma  = 44
-    nl     = 10
-    cr     = 13
-    sp     = 32
-    tab    =  9
-
 -- | Like 'encodeByName', but lets you customize how the CSV data is
 -- encoded.
 encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> V.Vector a
@@ -222,42 +188,6 @@ encodeByNameWith opts hdr v =
 {-# INLINE encodeByNameWith #-}
 
 
-namedRecordToRecord :: Header -> NamedRecord -> Record
-namedRecordToRecord hdr nr = V.map find hdr
-  where
-    find n = case HM.lookup n nr of
-        Nothing -> moduleError "namedRecordToRecord" $
-                   "header contains name " ++ show (B8.unpack n) ++
-                   " which is not present in the named record"
-        Just v  -> v
-
-moduleError :: String -> String -> a
-moduleError func msg = error $ "Data.Csv.Encoding." ++ func ++ ": " ++ msg
-{-# NOINLINE moduleError #-}
-
-unlines :: [Builder] -> Builder
-unlines [] = mempty
-unlines (b:bs) = b <> fromString "\r\n" <> unlines bs
-
-intersperse :: Builder -> [Builder] -> [Builder]
-intersperse _   []      = []
-intersperse sep (x:xs)  = x : prependToAll sep xs
-
-prependToAll :: Builder -> [Builder] -> [Builder]
-prependToAll _   []     = []
-prependToAll sep (x:xs) = sep <> x : prependToAll sep xs
-
-decodeWithP :: AL.Parser a -> (a -> Either String b) -> L.ByteString -> Either String b
-decodeWithP p to s =
-    case AL.parse p s of
-      AL.Done _ v     -> case to v of
-          Right a  -> Right a
-          Left msg -> Left $ "conversion error: " ++ msg
-      AL.Fail left _ msg -> Left $ "parse error (" ++ msg ++ ") at " ++
-                            show (BL8.unpack left)
-{-# INLINE decodeWithP #-}
-
-
 ------------------------------------------------------------------------
 -- * Space-delimited files
 
@@ -326,3 +256,77 @@ encodeTableRow = mconcat . intersperse (fromWord8 9)
     escapeT b | B.null b  = "\"\""
               | otherwise = escape b
 {-# INLINE encodeTableRow #-}
+
+
+------------------------------------------------------------------------
+-- * Common functionality and helpers
+
+parseCsv :: FromRecord a => Csv -> Parser (Vector a)
+parseCsv xs = V.fromList <$!> mapM' parseRecord (V.toList xs)
+
+parseNamedCsv :: FromNamedRecord a => Vector NamedRecord -> Parser (Vector a)
+parseNamedCsv xs = V.fromList <$!> mapM' parseNamedRecord (V.toList xs)
+
+-- TODO: Optimize
+escape :: B.ByteString -> B.ByteString
+escape s
+    | B.find (\ b -> b == dquote || b == comma || b == nl || b == cr ||
+                     b == sp     || b == tab) s == Nothing = s
+    | otherwise =
+        B.concat ["\"",
+                  B.concatMap
+                  (\ b -> if b == dquote then "\"\"" else B.singleton b) s,
+                  "\""]
+  where
+    dquote = 34
+    comma  = 44
+    nl     = 10
+    cr     = 13
+    sp     = 32
+    tab    =  9
+
+namedRecordToRecord :: Header -> NamedRecord -> Record
+namedRecordToRecord hdr nr = V.map find hdr
+  where
+    find n = case HM.lookup n nr of
+        Nothing -> moduleError "namedRecordToRecord" $
+                   "header contains name " ++ show (B8.unpack n) ++
+                   " which is not present in the named record"
+        Just v  -> v
+
+moduleError :: String -> String -> a
+moduleError func msg = error $ "Data.Csv.Encoding." ++ func ++ ": " ++ msg
+{-# NOINLINE moduleError #-}
+
+unlines :: [Builder] -> Builder
+unlines [] = mempty
+unlines (b:bs) = b <> fromString "\r\n" <> unlines bs
+
+intersperse :: Builder -> [Builder] -> [Builder]
+intersperse _   []      = []
+intersperse sep (x:xs)  = x : prependToAll sep xs
+
+prependToAll :: Builder -> [Builder] -> [Builder]
+prependToAll _   []     = []
+prependToAll sep (x:xs) = sep <> x : prependToAll sep xs
+
+decodeWithP :: AL.Parser a -> (a -> Either String b) -> L.ByteString -> Either String b
+decodeWithP p to s =
+    case AL.parse p s of
+      AL.Done _ v     -> case to v of
+          Right a  -> Right a
+          Left msg -> Left $ "conversion error: " ++ msg
+      AL.Fail left _ msg -> Left $ "parse error (" ++ msg ++ ") at " ++
+                            show (BL8.unpack left)
+{-# INLINE decodeWithP #-}
+
+mapM' :: Monad m => (a -> m b) -> [a] -> m [b]
+mapM' f = go
+  where
+    go [] = return []
+    go (x:xs) = do
+        !y <- f x
+        ys <- go xs
+        return (y : ys)
+{-# INLINE mapM' #-}
+

From 4f503c18f57445ad391ff2032490d1eea7200ee5 Mon Sep 17 00:00:00 2001
From: Alexey Khudyakov <alexey.skladnoy@gmail.com>
Date: Wed, 2 Jan 2013 17:57:54 +0400
Subject: [PATCH 21/21] Missing type signature

---
 tests/UnitTests.hs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/UnitTests.hs b/tests/UnitTests.hs
index ccfcd46..c47a2b3 100644
--- a/tests/UnitTests.hs
+++ b/tests/UnitTests.hs
@@ -74,6 +74,8 @@ recordsToList (S.Cons (Right x) rs) = case recordsToList rs of
     (Right xs) -> Right (x : xs)
 
 -- decodesStreamingAs :: BL.ByteString -> [[B.ByteString]] -> Assertion
+decodesStreamingAs :: (Bool -> BL8.ByteString -> S.Records [B.ByteString])
+                   -> BL8.ByteString -> [[B.ByteString]] -> Assertion
 decodesStreamingAs dec input expected =
     assertResult input expected $ fmap (V.fromList . map V.fromList) $
     recordsToList $ dec False input