Permalink
Browse files

Fix parsing bugs

  • Loading branch information...
1 parent c528fe9 commit 2c3454e1594be1976bc83a6952a059921236691c @tibbe tibbe committed Apr 13, 2012
Showing with 26 additions and 17 deletions.
  1. +11 −4 Data/Ceason/Parser/Internal.hs
  2. +10 −4 ceason.cabal
  3. +5 −9 tests/UnitTests.hs
View
15 Data/Ceason/Parser/Internal.hs
@@ -23,7 +23,7 @@ import Data.Ceason.Types
csv :: AL.Parser Csv
csv = do
- vals <- record `sepBy` endOfLine -- XXX: Eats one newline too many because record accepts empty input
+ vals <- record `sepBy1` endOfLine
_ <- optional endOfLine
endOfInput
return (V.fromList (filterLastIfEmpty vals))
@@ -34,10 +34,16 @@ csv = do
filterLastIfEmpty (v:vs) = v : filterLastIfEmpty vs
record :: AL.Parser Record
-record = V.fromList <$> field `sepBy` comma
+record = V.fromList <$> field `sepBy1` comma
field :: AL.Parser Field
-field = escapedField <|> unescapedField
+field = do
+ mb <- A.peek
+ -- We purposely don't use <|> as we want to commit to the first
+ -- choice if we see a double quote.
+ case mb of
+ Just b | b == doubleQuote -> escapedField
+ _ -> unescapedField
escapedField :: AL.Parser S.ByteString
escapedField = do
@@ -47,7 +53,8 @@ escapedField = do
s <- S.init <$> (A.scan False $ \s c -> if c == doubleQuote
then if s then Just False
else Just True
- else Just False)
+ else if s then Nothing
+ else Just False)
if doubleQuote `S.elem` s
then case Z.parse unescape s of
Right r -> return r
View
14 ceason.cabal
@@ -18,14 +18,17 @@ Cabal-version: >=1.8
Library
Exposed-modules: Data.Ceason
+ Data.Ceason.Encode
+ Data.Ceason.Parser
Data.Ceason.Types
- Other-modules: Data.Ceason.Encode
+ Other-modules: Data.Ceason.Encode.Internal
Data.Ceason.Parser.Internal
+ Data.Ceason.Types.Class
Data.Ceason.Types.Internal
Build-depends: array,
- attoparsec,
+ attoparsec >= 0.10.2,
base,
blaze-builder,
bytestring,
@@ -37,10 +40,13 @@ Library
Test-suite unit-tests
Type: exitcode-stdio-1.0
Main-is: UnitTests.hs
- Build-depends: base,
+ Build-depends: attoparsec,
+ base,
bytestring,
ceason,
HUnit,
test-framework,
- test-framework-hunit
+ test-framework-hunit,
+ vector
+
hs-source-dirs: tests
View
14 tests/UnitTests.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
module Main
( main
) where
@@ -36,10 +37,6 @@ testRfc4180 = readTest
["a,a", "b\"bb", "ccc"],
["zzz", "yyy", "xxx"]]
-testReadOddInputs :: Assertion
-testReadOddInputs = do
- readTest "" []
-
testReadEol :: Assertion
testReadEol = do
readTest "a,b" [["a","b"]]
@@ -48,10 +45,9 @@ testReadEol = do
allTests :: [TF.Test]
allTests =
- [ TF.testCase "readOddInputs" testReadOddInputs
- , TF.testCase "readEol" testReadEol
- , TF.testCase "rfc4180" testRfc4180
- ]
+ [ TF.testCase "readEol" testReadEol
+ , TF.testCase "rfc4180" testRfc4180
+ ]
main :: IO ()
main = defaultMain allTests
@@ -61,4 +57,4 @@ decode :: FromRecord a => BL.ByteString -> Either String (V.Vector a)
decode s =
case AL.parse csv s of
AL.Done _ v -> parseEither (traverse parseRecord) v
- AL.Fail _ _ err -> Left err
+ AL.Fail left _ err -> Left $ err ++ ", got " ++ show (BL8.unpack left)

0 comments on commit 2c3454e

Please sign in to comment.