Skip to content

Commit

Permalink
Merge pull request #172 from meiersi/feat-encode-via-bytestring-builder
Browse files Browse the repository at this point in the history
Feat encode via bytestring builder
  • Loading branch information
bos committed Jan 8, 2014
2 parents 555c7d1 + 7f92045 commit faa9936
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 48 deletions.
93 changes: 56 additions & 37 deletions Data/Aeson/Encode.hs
@@ -1,4 +1,4 @@
{-# LANGUAGE BangPatterns, OverloadedStrings #-}
{-# LANGUAGE CPP, BangPatterns, OverloadedStrings #-}

-- |
-- Module: Data.Aeson.Encode
Expand All @@ -14,49 +14,74 @@
-- Most frequently, you'll probably want to encode straight to UTF-8
-- (the standard JSON encoding) using 'encode'.
--
-- You can convert a 'Builder' (as returned by 'fromValue') to a
-- string using e.g. 'toLazyText'.

-- You can use the conversions to 'Builder's when embedding JSON messages as
-- parts of a protocol.
module Data.Aeson.Encode
(
fromValue
, encode
( encode

#if MIN_VERSION_bytestring(0,10,4)
-- * Encoding to Builders
, encodeToByteStringBuilder
, encodeToTextBuilder
#else
-- * Encoding to Text Builders
, encodeToTextBuilder
#endif

-- * Deprecated
, fromValue
) where

import Data.Aeson.Types (ToJSON(..), Value(..))
import Data.Aeson.Types (Value(..))
import Data.Monoid (mappend)
import Data.Scientific (Scientific, coefficient, base10Exponent, scientificBuilder)
import Data.Text.Lazy.Builder
import Data.Text.Lazy.Builder.Int (decimal)
import Data.Text.Lazy.Encoding (encodeUtf8)
import Numeric (showHex)
import qualified Data.ByteString.Lazy as L
import qualified Data.HashMap.Strict as H
import qualified Data.Text as T
import qualified Data.Vector as V

-- | Encode a JSON value to a 'Builder'. You can convert this to a
-- string using e.g. 'toLazyText', or encode straight to UTF-8 (the
-- standard JSON encoding) using 'encode'.
#if MIN_VERSION_bytestring(0,10,4)
import Data.Aeson.Encode.ByteString (encode, encodeToByteStringBuilder)
#else
import Data.Aeson.Types (ToJSON(toJSON))
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.Text.Lazy.Encoding as TLE

-- | Encode a JSON 'Value' as a UTF-8 encoded 'BL.ByteString'.
encode :: ToJSON a => a -> BL.ByteString
encode = TLE.encodeUtf8 . TLB.toLazyText . encodeToTextBuilder . toJSON
#endif

-- | Encode a JSON 'Value' to a 'Builder', which can be embedded efficiently
-- in a text-based protocol.
encodeToTextBuilder :: Value -> Builder
encodeToTextBuilder =
go
where
go Null = {-# SCC "go/Null" #-} "null"
go (Bool b) = {-# SCC "go/Bool" #-} if b then "true" else "false"
go (Number s) = {-# SCC "go/Number" #-} fromScientific s
go (String s) = {-# SCC "go/String" #-} string s
go (Array v)
| V.null v = {-# SCC "go/Array" #-} "[]"
| otherwise = {-# SCC "go/Array" #-}
singleton '[' <>
go (V.unsafeHead v) <>
V.foldr f (singleton ']') (V.unsafeTail v)
where f a z = singleton ',' <> go a <> z
go (Object m) = {-# SCC "go/Object" #-}
case H.toList m of
(x:xs) -> singleton '{' <> one x <> foldr f (singleton '}') xs
_ -> "{}"
where f a z = singleton ',' <> one a <> z
one (k,v) = string k <> singleton ':' <> go v

{-# DEPRECATED fromValue "Use 'encodeToTextBuilder' instead" #-}
fromValue :: Value -> Builder
fromValue Null = {-# SCC "fromValue/Null" #-} "null"
fromValue (Bool b) = {-# SCC "fromValue/Bool" #-}
if b then "true" else "false"
fromValue (Number s) = {-# SCC "fromValue/Number" #-} fromScientific s
fromValue (String s) = {-# SCC "fromValue/String" #-} string s
fromValue (Array v)
| V.null v = {-# SCC "fromValue/Array" #-} "[]"
| otherwise = {-# SCC "fromValue/Array" #-}
singleton '[' <>
fromValue (V.unsafeHead v) <>
V.foldr f (singleton ']') (V.unsafeTail v)
where f a z = singleton ',' <> fromValue a <> z
fromValue (Object m) = {-# SCC "fromValue/Object" #-}
case H.toList m of
(x:xs) -> singleton '{' <> one x <> foldr f (singleton '}') xs
_ -> "{}"
where f a z = singleton ',' <> one a <> z
one (k,v) = string k <> singleton ':' <> fromValue v
fromValue = encodeToTextBuilder

string :: T.Text -> Builder
string s = {-# SCC "string" #-} singleton '"' <> quote s <> singleton '"'
Expand Down Expand Up @@ -93,12 +118,6 @@ fromScientific s
where
e = base10Exponent s

-- | Efficiently serialize a JSON value as a lazy 'L.ByteString'.
encode :: ToJSON a => a -> L.ByteString
encode = {-# SCC "encode" #-} encodeUtf8 . toLazyText . fromValue .
{-# SCC "toJSON" #-} toJSON
{-# INLINE encode #-}

(<>) :: Builder -> Builder -> Builder
(<>) = mappend
{-# INLINE (<>) #-}
Expand Down
116 changes: 116 additions & 0 deletions Data/Aeson/Encode/ByteString.hs
@@ -0,0 +1,116 @@
{-# LANGUAGE BangPatterns, OverloadedStrings #-}

-- |
-- Module: Data.Aeson.EncodeUtf8
-- Copyright: (c) 2011 MailRank, Inc.
-- (c) 2013 Simon Meier <iridcode@gmail.com>
-- License: Apache
-- Maintainer: Bryan O'Sullivan <bos@serpentine.com>
-- Stability: experimental
-- Portability: portable
--
-- Efficiently serialize a JSON value using the UTF-8 encoding.

module Data.Aeson.Encode.ByteString
( encode
, encodeToByteStringBuilder
) where

import Prelude hiding (null)
import Data.Aeson.Types (ToJSON(..), Value(..))
import Data.Char (ord)
import Data.Scientific (Scientific, coefficient, base10Exponent, formatScientific, FPFormat(Generic))
import Data.Word (Word8)
import Data.Monoid (mappend)
import Data.ByteString.Builder as B
import Data.ByteString.Builder.Prim as BP
import qualified Data.ByteString.Lazy as L
import qualified Data.HashMap.Strict as HMS
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Vector as V

(<>) :: Builder -> Builder -> Builder
(<>) = mappend
{-# INLINE (<>) #-}
infixr 6 <>

-- | Efficiently serialize a JSON value as a lazy 'L.ByteString'.
encode :: ToJSON a => a -> L.ByteString
encode = B.toLazyByteString . encodeToByteStringBuilder . toJSON

-- | Encode a JSON value to a ByteString 'B.Builder'. Use this function if you
-- must prepend or append further bytes to the encoded JSON value.
encodeToByteStringBuilder :: Value -> Builder
encodeToByteStringBuilder Null = null
encodeToByteStringBuilder (Bool b) = bool b
encodeToByteStringBuilder (Number n) = number n
encodeToByteStringBuilder (String s) = string s
encodeToByteStringBuilder (Array v) = array v
encodeToByteStringBuilder (Object m) = object m

null :: Builder
null = BP.primBounded (ascii4 ('n',('u',('l','l')))) ()

bool :: Bool -> Builder
bool = BP.primBounded (BP.condB id (ascii4 ('t',('r',('u','e'))))
(ascii5 ('f',('a',('l',('s','e'))))))

array :: V.Vector Value -> Builder
array v
| V.null v = B.char8 '[' <> B.char8 ']'
| otherwise = B.char8 '[' <>
encodeToByteStringBuilder (V.unsafeHead v) <>
V.foldr withComma (B.char8 ']') (V.unsafeTail v)
where
withComma a z = B.char8 ',' <> encodeToByteStringBuilder a <> z

object :: HMS.HashMap T.Text Value -> Builder
object m = case HMS.toList m of
(x:xs) -> B.char8 '{' <> one x <> foldr withComma (B.char8 '}') xs
_ -> B.char8 '{' <> B.char8 '}'
where
withComma a z = B.char8 ',' <> one a <> z
one (k,v) = string k <> B.char8 ':' <> encodeToByteStringBuilder v

string :: T.Text -> B.Builder
string t =
B.char8 '"' <> TE.encodeUtf8BuilderEscaped escapeAscii t <> B.char8 '"'
where
escapeAscii :: BP.BoundedPrim Word8
escapeAscii =
BP.condB (== c2w '\\' ) (ascii2 ('\\','\\')) $
BP.condB (== c2w '\"' ) (ascii2 ('\\','"' )) $
BP.condB (>= c2w '\x20') (BP.liftFixedToBounded BP.word8) $
BP.condB (== c2w '\n' ) (ascii2 ('\\','n' )) $
BP.condB (== c2w '\r' ) (ascii2 ('\\','r' )) $
BP.condB (== c2w '\t' ) (ascii2 ('\\','t' )) $
(BP.liftFixedToBounded hexEscape) -- fallback for chars < 0x20

c2w = fromIntegral . ord

hexEscape :: BP.FixedPrim Word8
hexEscape = (\c -> ('\\', ('u', fromIntegral c))) BP.>$<
BP.char8 BP.>*< BP.char8 BP.>*< BP.word16HexFixed

number :: Scientific -> Builder
number s
| e < 0 = B.string8 $ formatScientific Generic Nothing s
| otherwise = B.integerDec (coefficient s * 10 ^ e)
where
e = base10Exponent s


{-# INLINE ascii2 #-}
ascii2 :: (Char, Char) -> BP.BoundedPrim a
ascii2 cs = BP.liftFixedToBounded $ (const cs) BP.>$< BP.char7 BP.>*< BP.char7

{-# INLINE ascii4 #-}
ascii4 :: (Char, (Char, (Char, Char))) -> BP.BoundedPrim a
ascii4 cs = BP.liftFixedToBounded $ (const cs) >$<
BP.char7 BP.>*< BP.char7 BP.>*< BP.char7 BP.>*< BP.char7

{-# INLINE ascii5 #-}
ascii5 :: (Char, (Char, (Char, (Char, Char)))) -> BP.BoundedPrim a
ascii5 cs = BP.liftFixedToBounded $ (const cs) >$<
BP.char7 BP.>*< BP.char7 BP.>*< BP.char7 BP.>*< BP.char7 BP.>*< BP.char7
2 changes: 1 addition & 1 deletion Data/Aeson/Types/Internal.hs
Expand Up @@ -180,7 +180,7 @@ data Value = Object !Object
| Number !Scientific
| Bool !Bool
| Null
deriving (Eq, Show, Typeable, Data)
deriving (Eq, Show, Typeable)

-- | A newtype wrapper for 'UTCTime' that uses the same non-standard
-- serialization format as Microsoft .NET, whose @System.DateTime@
Expand Down
16 changes: 14 additions & 2 deletions aeson.cabal
Expand Up @@ -92,6 +92,10 @@ flag blaze-builder
description: Use blaze-builder instead of bytestring >= 0.10
default: False

flag new-bytestring-builder
description: Use the new bytestring builder available in bytestring >= 0.10.4.0
default: False

library
exposed-modules:
Data.Aeson
Expand All @@ -108,6 +112,14 @@ library
Data.Aeson.Types.Instances
Data.Aeson.Types.Internal

if flag(new-bytestring-builder)
other-modules: Data.Aeson.Encode.ByteString
build-depends: bytestring >= 0.10.4.0,
text >= 1.1.0.0
else
build-depends: bytestring < 0.10.4.0,
text >= 0.11.1.0

if impl(ghc >= 7.2.1)
cpp-options: -DGENERICS
build-depends: ghc-prim >= 0.2, dlist >= 0.2
Expand All @@ -117,15 +129,13 @@ library
build-depends:
attoparsec >= 0.11.1.0,
base == 4.*,
bytestring,
containers,
deepseq,
hashable >= 1.1.2.0,
mtl,
old-locale,
syb,
template-haskell >= 2.4,
text >= 0.11.1.0,
time,
unordered-containers >= 0.1.3.0,
vector >= 0.7.1,
Expand Down Expand Up @@ -172,6 +182,8 @@ test-suite tests
template-haskell,
test-framework,
test-framework-quickcheck2,
test-framework-hunit,
HUnit,
text,
time,
unordered-containers,
Expand Down
18 changes: 16 additions & 2 deletions benchmarks/CompareWithJSON.hs
Expand Up @@ -5,9 +5,13 @@ import Blaze.ByteString.Builder (toLazyByteString)
import Blaze.ByteString.Builder.Char.Utf8 (fromString)
import Control.DeepSeq (NFData(rnf))
import Criterion.Main
import qualified Data.Aeson.Encode as A
import qualified Data.Aeson as A
import qualified Data.ByteString.Lazy as BL
import qualified Text.JSON as J
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.Text.Lazy.Encoding as TLE

#if !MIN_VERSION_bytestring(0,10,0)
import qualified Data.ByteString.Lazy.Internal as BL
Expand Down Expand Up @@ -42,6 +46,12 @@ decodeA s = case A.decode s of
encodeJ :: J.JSValue -> BL.ByteString
encodeJ = toLazyByteString . fromString . J.encode

encodeToText :: A.Value -> TL.Text
encodeToText = TLB.toLazyText . A.encodeToTextBuilder . A.toJSON

encodeViaText :: A.Value -> BL.ByteString
encodeViaText = TLE.encodeUtf8 . encodeToText

main :: IO ()
main = do
let enFile = "json-data/twitter100.json"
Expand All @@ -63,11 +73,15 @@ main = do
]
, bgroup "encode" [
bgroup "en" [
bench "aeson" $ nf A.encode (decodeA enA)
bench "aeson-to-bytestring" $ nf A.encode (decodeA enA)
, bench "aeson-via-text-to-bytestring" $ nf encodeViaText (decodeA enA)
, bench "aeson-to-text" $ nf encodeToText (decodeA enA)
, bench "json" $ nf encodeJ (decodeJ enJ)
]
, bgroup "jp" [
bench "aeson" $ nf A.encode (decodeA jpA)
bench "aeson-to-bytestring" $ nf A.encode (decodeA jpA)
, bench "aeson-via-text-to-bytestring" $ nf encodeViaText (decodeA jpA)
, bench "aeson-to-text" $ nf encodeToText (decodeA jpA)
, bench "json" $ nf encodeJ (decodeJ jpJ)
]
]
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/aeson-benchmarks.cabal
Expand Up @@ -13,7 +13,8 @@ executable aeson-benchmark-compare-with-json
blaze-builder,
bytestring,
criterion,
json
json,
text

executable aeson-benchmark-aeson-encode
main-is: AesonEncode.hs
Expand Down

0 comments on commit faa9936

Please sign in to comment.