Skip to content

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
  • 11 commits
  • 6 files changed
  • 0 commit comments
  • 1 contributor
Commits on Jun 15, 2012
@bos Bump dependency on bytestring c1b5863
@bos Bump version to 0.11.2.2 7f2801b
Commits on Jun 17, 2012
@bos A small improvement in Text generation efficiency.
The specialised Run tuple avoids boxing and indirection for both
the MArray and Int parameters in the predecessor code.

I think I can do better, though.
4e82935
Commits on Jun 19, 2012
@bos Another try at improving Text generation. d52fb2a
@bos Merge 9dd4488
@bos Merge d651007
@bos unstream: Fine-tune the continuation handling ae1cccb
@bos unstream: choose a winner - the continuation
For a simple benchmark filtering many very small strings, this
allocates 7.8% less memory and 2.4% less time than unstream in
0.11.2.1.  The difference slowly disappears for larger strings.

The "use a specialised pair" approach can't match the reduction in
memory use, and for reasons unknown to me, seems to be slower than
the code in 0.11.2.1.
eb0076e
Commits on Jun 24, 2012
@bos Simplify the unstream code 7366026
@bos Switch decodeUtf8 to runText
This improves performance by 10% on small strings, and reduces the
amount of memory allocated by 17%.
b131ab8
@bos Merge a8b2bd6
Showing with 49 additions and 38 deletions.
  1. +24 −23 Data/Text/Encoding.hs
  2. +8 −7 Data/Text/Fusion.hs
  3. +11 −2 Data/Text/Private.hs
  4. +1 −1 tests/benchmarks/text-benchmarks.cabal
  5. +2 −2 tests/tests/text-tests.cabal
  6. +3 −3 text.cabal
View
47 Data/Text/Encoding.hs
@@ -57,7 +57,8 @@ import Data.Bits ((.&.))
import Data.ByteString as B
import Data.ByteString.Internal as B
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
-import Data.Text.Internal (Text(..), textP)
+import Data.Text.Internal (Text(..))
+import Data.Text.Private (runText)
import Data.Text.UnsafeChar (ord, unsafeWrite)
import Data.Text.UnsafeShift (shiftL, shiftR)
import Data.Word (Word8)
@@ -96,30 +97,30 @@ decodeASCII = decodeUtf8
-- | Decode a 'ByteString' containing UTF-8 encoded text.
decodeUtf8With :: OnDecodeError -> ByteString -> Text
-decodeUtf8With onErr (PS fp off len) = textP (fst a) 0 (snd a)
+decodeUtf8With onErr (PS fp off len) = runText $ \done -> do
+ let go dest = withForeignPtr fp $ \ptr ->
+ with (0::CSize) $ \destOffPtr -> do
+ let end = ptr `plusPtr` (off + len)
+ loop curPtr = do
+ curPtr' <- c_decode_utf8 (A.maBA dest) destOffPtr curPtr end
+ if curPtr' == end
+ then do
+ n <- peek destOffPtr
+ unsafeSTToIO (done dest (fromIntegral n))
+ else do
+ x <- peek curPtr'
+ case onErr desc (Just x) of
+ Nothing -> loop $ curPtr' `plusPtr` 1
+ Just c -> do
+ destOff <- peek destOffPtr
+ w <- unsafeSTToIO $
+ unsafeWrite dest (fromIntegral destOff) c
+ poke destOffPtr (destOff + fromIntegral w)
+ loop $ curPtr' `plusPtr` 1
+ loop (ptr `plusPtr` off)
+ (unsafeIOToST . go) =<< A.new len
where
- a = A.run2 (A.new len >>= unsafeIOToST . go)
desc = "Data.Text.Encoding.decodeUtf8: Invalid UTF-8 stream"
- go dest = withForeignPtr fp $ \ptr ->
- with (0::CSize) $ \destOffPtr -> do
- let end = ptr `plusPtr` (off + len)
- loop curPtr = do
- curPtr' <- c_decode_utf8 (A.maBA dest) destOffPtr curPtr end
- if curPtr' == end
- then do
- n <- peek destOffPtr
- return (dest,fromIntegral n)
- else do
- x <- peek curPtr'
- case onErr desc (Just x) of
- Nothing -> loop $ curPtr' `plusPtr` 1
- Just c -> do
- destOff <- peek destOffPtr
- w <- unsafeSTToIO $
- unsafeWrite dest (fromIntegral destOff) c
- poke destOffPtr (destOff + fromIntegral w)
- loop $ curPtr' `plusPtr` 1
- loop (ptr `plusPtr` off)
{- INLINE[0] decodeUtf8With #-}
-- | Decode a 'ByteString' containing UTF-8 encoded text that is known
View
15 Data/Text/Fusion.hs
@@ -51,6 +51,7 @@ import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int,
fromIntegral, otherwise)
import Data.Bits ((.&.))
import Data.Text.Internal (Text(..))
+import Data.Text.Private (runText)
import Data.Text.UnsafeChar (ord, unsafeChr, unsafeWrite)
import Data.Text.UnsafeShift (shiftL, shiftR)
import qualified Data.Text.Array as A
@@ -94,15 +95,14 @@ reverseStream (Text arr off len) = Stream next (off+len-1) (maxSize len)
-- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
unstream :: Stream Char -> Text
-unstream (Stream next0 s0 len) = I.textP (P.fst a) 0 (P.snd a)
- where
- a = A.run2 (A.new mlen >>= \arr -> outer arr mlen s0 0)
- where mlen = upperBound 4 len
- outer arr top = loop
- where
+unstream (Stream next0 s0 len) = runText $ \done -> do
+ let mlen = upperBound 4 len
+ arr0 <- A.new mlen
+ let outer arr top = loop
+ where
loop !s !i =
case next0 s of
- Done -> return (arr, i)
+ Done -> done arr i
Skip s' -> loop s' i
Yield x s'
| j >= top -> {-# SCC "unstream/resize" #-} do
@@ -114,6 +114,7 @@ unstream (Stream next0 s0 len) = I.textP (P.fst a) 0 (P.snd a)
loop s' (i+d)
where j | ord x < 0x10000 = i
| otherwise = i + 1
+ outer arr0 mlen s0 0
{-# INLINE [0] unstream #-}
{-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}
View
13 Data/Text/Private.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE BangPatterns, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, Rank2Types, UnboxedTuples #-}
-- |
-- Module : Data.Text.Private
@@ -11,11 +11,14 @@
module Data.Text.Private
(
- span_
+ runText
+ , span_
) where
+import Control.Monad.ST (ST, runST)
import Data.Text.Internal (Text(..), textP)
import Data.Text.Unsafe (Iter(..), iter)
+import qualified Data.Text.Array as A
span_ :: (Char -> Bool) -> Text -> (# Text, Text #)
span_ p t@(Text arr off len) = (# hd,tl #)
@@ -26,3 +29,9 @@ span_ p t@(Text arr off len) = (# hd,tl #)
| otherwise = i
where Iter c d = iter t i
{-# INLINE span_ #-}
+
+runText :: (forall s. (A.MArray s -> Int -> ST s Text) -> ST s Text) -> Text
+runText act = runST (act $ \ !marr !len -> do
+ arr <- A.unsafeFreeze marr
+ return $! textP arr 0 len)
+{-# INLINE runText #-}
View
2 tests/benchmarks/text-benchmarks.cabal
@@ -24,7 +24,7 @@ executable text-benchmarks
cpp-options: -DHAVE_DEEPSEQ
build-depends: base >= 4 && < 5,
criterion >= 0.5 && < 0.7,
- bytestring >= 0.9 && < 0.10,
+ bytestring >= 0.9,
deepseq >= 1.1 && < 1.2,
filepath >= 1.1 && < 1.3,
directory >= 1.1 && < 1.2,
View
4 tests/tests/text-tests.cabal
@@ -37,7 +37,7 @@ executable text-tests
build-depends:
text-tests,
base >= 4 && < 5,
- bytestring >= 0.9 && < 0.10,
+ bytestring >= 0.9,
deepseq >= 1.1,
directory >= 1.1 && < 1.2,
random >= 1.0 && < 1.1,
@@ -118,7 +118,7 @@ library
build-depends:
array,
base >= 4 && < 5,
- bytestring >= 0.9 && < 1.0,
+ bytestring >= 0.9,
deepseq >= 1.1,
integer-gmp >= 0.2 && < 0.3,
ghc-prim >= 0.2 && < 0.3
View
6 text.cabal
@@ -1,5 +1,5 @@
name: text
-version: 0.11.2.1
+version: 0.11.2.2
homepage: https://github.com/bos/text
bug-reports: https://github.com/bos/text/issues
synopsis: An efficient packed Unicode text type.
@@ -124,7 +124,7 @@ library
build-depends:
array,
base < 5,
- bytestring >= 0.9 && < 1.0
+ bytestring >= 0.9
if impl(ghc >= 6.10)
build-depends:
ghc-prim, base >= 4, deepseq >= 1.1.0.0
@@ -168,7 +168,7 @@ test-suite tests
build-depends:
base >= 4 && < 5,
- bytestring >= 0.9 && < 0.10,
+ bytestring >= 0.9,
deepseq >= 1.1,
ghc-prim,
directory >= 1.0 && < 1.2,

No commit comments for this range

Something went wrong with that request. Please try again.