Skip to content

Commit

Permalink
Add lazy encoding/decoding support. Improve docs.
Browse files Browse the repository at this point in the history
  • Loading branch information
bos committed Jan 25, 2011
1 parent 2aa74de commit dd81a1e
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 2 deletions.
18 changes: 17 additions & 1 deletion Data/ByteString/Base16.hs
Expand Up @@ -31,9 +31,16 @@ digits :: ByteString
digits = "0123456789abcdef"
{-# NOINLINE digits #-}

-- | Encode a string into base16 form. The result will always be a
-- multiple of 2 bytes in length.
--
-- Example:
--
-- > encode "foo" == "666f6f"
encode :: ByteString -> ByteString
encode (PS sfp soff slen)
| slen > maxBound `div` 2 = error "Data.ByteString.Base16.encode: input too large"
| slen > maxBound `div` 2 =
error "Data.ByteString.Base16.encode: input too long"
| otherwise = unsafeCreate (slen*2) $ \dptr ->
withForeignPtr sfp $ \sptr ->
enc (sptr `plusPtr` soff) dptr
Expand All @@ -47,6 +54,15 @@ encode (PS sfp soff slen)
poke (d `plusPtr` 1) . unsafeIndex digits $ x .&. 0xf
go (s `plusPtr` 1) (d `plusPtr` 2)

-- | Decode a string from base16 form. The first element of the
-- returned tuple contains the decoded data. The second element starts
-- at the first invalid base16 sequence in the original string.
--
-- Examples:
--
-- > decode "666f6f" == ("foo", "")
-- > decode "66quux" == ("f", "quux")
-- > decode "666quux" == ("f", "6quux")
decode :: ByteString -> (ByteString, ByteString)
decode (PS sfp soff slen) =
unsafePerformIO . createAndTrim' (slen `div` 2) $ \dptr ->
Expand Down
64 changes: 64 additions & 0 deletions Data/ByteString/Base16/Lazy.hs
@@ -0,0 +1,64 @@
{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module : Data.ByteString.Base16.Lazy
-- Copyright : (c) 2011 MailRank, Inc.
--
-- License : BSD
-- Maintainer : bos@mailrank.com
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base16-encoded strings.

module Data.ByteString.Base16.Lazy
(
encode
, decode
) where

import Data.Word (Word8)
import qualified Data.ByteString.Base16 as B16
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import Data.ByteString.Lazy.Internal

-- | Encode a string into base16 form. The result will always be a
-- multiple of 2 bytes in length.
--
-- Example:
--
-- > encode "foo" == "666f6f"
encode :: ByteString -> ByteString
encode (Chunk c cs) = Chunk (B16.encode c) (encode cs)
encode Empty = Empty

-- | Decode a string from base16 form. The first element of the
-- returned tuple contains the decoded data. The second element starts
-- at the first invalid base16 sequence in the original string.
--
-- This function operates as lazily as possible over the input chunks.
-- The only instance in which it is non-lazy is if an odd-length chunk
-- ends with a byte that is valid base16.
--
-- Examples:
--
-- > decode "666f6f" == ("foo", "")
-- > decode "66quux" == ("f", "quux")
-- > decode "666quux" == ("f", "6quux")
decode :: ByteString -> (ByteString, ByteString)
decode = foldrChunks go (Empty, Empty)
where go c ~(y,z)
| len == 0 = (chunk h y, z)
| len == 1 && isHex (B.unsafeHead t) =
case z of
Chunk a as | isHex (B.unsafeHead a)
-> let (q,_) = B16.decode (t `B.snoc` B.unsafeHead a)
in (chunk h (chunk q y), chunk (B.unsafeTail a) as)
_ -> (chunk h y, chunk t z)
| otherwise = (chunk h y, chunk t z)
where (h,t) = B16.decode c
len = B.length t

isHex :: Word8 -> Bool
isHex w = (w >= 48 && w <= 57) || (w >= 97 && w <= 102) || (w >= 65 && w <= 70)
3 changes: 2 additions & 1 deletion base16-bytestring.cabal
@@ -1,5 +1,5 @@
name: base16-bytestring
version: 0.1.0.0
version: 0.1.1.0
synopsis: Fast base16 (hex) encoding and deconding for ByteStrings
description: Fast base16 (hex) encoding and deconding for ByteStrings
homepage: http://github.com/mailrank/base16-bytestring
Expand All @@ -19,6 +19,7 @@ Cabal-version: >=1.6
library
exposed-modules:
Data.ByteString.Base16
Data.ByteString.Base16.Lazy

build-depends:
base == 4.*,
Expand Down

0 comments on commit dd81a1e

Please sign in to comment.