-
Notifications
You must be signed in to change notification settings - Fork 106
/
Encoding.hs
182 lines (155 loc) · 6.3 KB
/
Encoding.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
{-# LANGUAGE BangPatterns, OverloadedStrings #-}
-- Module: Data.Ceason.Encode
-- Copyright: (c) 2011 MailRank, Inc.
-- (c) 2012 Johan Tibell
-- License: BSD3
-- Maintainer: Johan Tibell <johan.tibell@gmail.com>
-- Stability: experimental
-- Portability: portable
--
-- Encoding and decoding of data types into CSV.
module Data.Ceason.Encoding
(
-- * Encoding and decoding
decode
, decodeByName
, encode
, encodeByName
-- ** Encoding and decoding options
, DecodeOptions(..)
, defaultDecodeOptions
, decodeWith
, decodeByNameWith
, EncodeOptions(..)
, defaultEncodeOptions
, encodeWith
, encodeByNameWith
) where
import Blaze.ByteString.Builder
import Blaze.ByteString.Builder.Char8
import Control.Applicative
import qualified Data.Attoparsec.ByteString.Lazy as AL
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.HashMap.Strict as HM
import Data.Monoid
import Data.Traversable
import Data.Vector (Vector)
import qualified Data.Vector as V
import Data.Word (Word8)
import Prelude hiding (unlines)
import Data.Ceason.Conversion
import Data.Ceason.Parser
import Data.Ceason.Types
-- TODO: 'encode' isn't as efficient as it could be.
------------------------------------------------------------------------
-- * Encoding and decoding
-- | Efficiently deserialize CSV records from a lazy 'L.ByteString'.
-- If this fails due to incomplete or invalid input, @'Left' msg@ is
-- returned. Equivalent to @'decodeWith' 'defaultDecodeOptions'@.
decode :: FromRecord a => L.ByteString -> Either String (Vector a)
decode = decodeWith defaultDecodeOptions
{-# INLINE decode #-}
-- | Efficiently deserialize CSV records from a lazy 'L.ByteString'.
-- If this fails due to incomplete or invalid input, @'Left' msg@ is
-- returned. The data is assumed to be preceeded by a header.
-- Equivalent to @'decodeByNameWith' 'defaultDecodeOptions'@.
decodeByName :: FromNamedRecord a => L.ByteString
-> Either String (Header, Vector a)
decodeByName = decodeByNameWith defaultDecodeOptions
{-# INLINE decodeByName #-}
-- | Efficiently serialize CVS records as a lazy 'L.ByteString'.
encode :: ToRecord a => V.Vector a -> L.ByteString
encode = encodeWith defaultEncodeOptions
-- | Efficiently serialize CVS records as a lazy 'L.ByteString'. The
-- header is written before any records and dictates the field order.
encodeByName :: ToNamedRecord a => Header -> V.Vector a -> L.ByteString
encodeByName = encodeByNameWith defaultEncodeOptions
------------------------------------------------------------------------
-- ** Encoding and decoding options
-- | Like 'decode', but lets you customize how the CSV data is parsed.
decodeWith :: FromRecord a => DecodeOptions -> L.ByteString
-> Either String (Vector a)
decodeWith !opts = decodeWithP (csv opts) (parse . traverse parseRecord)
{-# INLINE [1] decodeWith #-}
{-# RULES
"idDecodeWith" decodeWith = idDecodeWith
#-}
-- | Same as 'decodeWith', but more efficient as no type
-- conversion is performed.
idDecodeWith :: DecodeOptions -> L.ByteString
-> Either String (Vector (Vector B.ByteString))
idDecodeWith !opts = decodeWithP (csv opts) pure
-- | Like 'decodeByName', but lets you customize how the CSV data is
-- parsed.
decodeByNameWith :: FromNamedRecord a => DecodeOptions -> L.ByteString
-> Either String (Header, Vector a)
decodeByNameWith !opts =
decodeWithP (csvWithHeader opts)
(\ (hdr, vs) -> (,) <$> pure hdr <*> (parse $ traverse parseNamedRecord vs))
-- | Options that controls how data is encoded. These options can be
-- used to e.g. encode data in a tab-separated format instead of in a
-- comma-separated format.
data EncodeOptions = EncodeOptions
{ -- | Field delimiter.
encDelimiter :: {-# UNPACK #-} !Word8
}
-- | Encoding options for CSV files.
defaultEncodeOptions :: EncodeOptions
defaultEncodeOptions = EncodeOptions
{ encDelimiter = 44 -- comma
}
-- | Like 'encode', but lets you customize how the CSV data is
-- encoded.
encodeWith :: ToRecord a => EncodeOptions -> V.Vector a -> L.ByteString
encodeWith opts = toLazyByteString
. unlines
. map (encodeRecord (encDelimiter opts) . toRecord)
. V.toList
encodeRecord :: Word8 -> Record -> Builder
encodeRecord delim = mconcat . intersperse (fromWord8 delim)
. map fromByteString . V.toList
{-# INLINE encodeRecord #-}
-- | Like 'encodeByName', but lets you customize how the CSV data is
-- encoded.
encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> V.Vector a
-> L.ByteString
encodeByNameWith opts hdr v =
toLazyByteString ((encodeRecord (encDelimiter opts) hdr) <>
fromByteString "\r\n" <> records)
where
records = unlines
. map (encodeRecord (encDelimiter opts)
. namedRecordToRecord hdr . toNamedRecord)
. V.toList $ v
namedRecordToRecord :: Header -> NamedRecord -> Record
namedRecordToRecord hdr nr = V.map find hdr
where
find n = case HM.lookup n nr of
Nothing -> moduleError "namedRecordToRecord" $
"header contains name " ++ show (B8.unpack n) ++
" which is not present in the named record"
Just v -> v
moduleError :: String -> String -> a
moduleError func msg = error $ "Data.Ceason.Encoding." ++ func ++ ": " ++ msg
{-# NOINLINE moduleError #-}
unlines :: [Builder] -> Builder
unlines [] = mempty
unlines (b:bs) = b <> fromString "\r\n" <> unlines bs
intersperse :: Builder -> [Builder] -> [Builder]
intersperse _ [] = []
intersperse sep (x:xs) = x : prependToAll sep xs
prependToAll :: Builder -> [Builder] -> [Builder]
prependToAll _ [] = []
prependToAll sep (x:xs) = sep <> x : prependToAll sep xs
decodeWithP :: AL.Parser a -> (a -> Result b) -> L.ByteString -> Either String b
decodeWithP p to s =
case AL.parse p s of
AL.Done _ v -> case to v of
Success a -> Right a
Error msg -> Left $ "conversion error: " ++ msg
AL.Fail left _ msg -> Left $ "parse error (" ++ msg ++ ") at " ++
show (BL8.unpack left)
{-# INLINE decodeWithP #-}