Permalink
Browse files

Switch to native code for copying and comparison.

--HG--
rename : Data/Text/Unsafe.hs => Data/Text/Unsafe/Base.hs
  • Loading branch information...
bos committed Jun 27, 2011
1 parent c186479 commit a871416fb443db1c0f3400d90090781baa6f14a3
Showing with 125 additions and 148 deletions.
  1. +39 −101 Data/Text/Array.hs
  2. +2 −37 Data/Text/Unsafe.hs
  3. +55 −0 Data/Text/Unsafe/Base.hs
  4. +13 −0 cbits/cbits.c
  5. +1 −0 tests/benchmarks/text-benchmarks.cabal
  6. +4 −5 tests/tests/text-tests.cabal
  7. +11 −5 text.cabal
View
@@ -1,9 +1,9 @@
-{-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, RecordWildCards,
- UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, MagicHash, Rank2Types,
+ RecordWildCards, UnboxedTuples, UnliftedFFITypes #-}
{-# OPTIONS_GHC -fno-warn-unused-matches #-}
-- |
-- Module : Data.Text.Array
--- Copyright : (c) 2009, 2010 Bryan O'Sullivan
+-- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,
@@ -60,14 +60,16 @@ if (_k_) < 0 || (_k_) >= (_len_) then error ("Data.Text.Array." ++ (_func_) ++ "
#if defined(ASSERTS)
import Control.Exception (assert)
#endif
+import Control.Monad.ST (unsafeIOToST)
import Data.Bits ((.&.), xor)
+import Data.Text.Unsafe.Base (inlinePerformIO)
import Data.Text.UnsafeShift (shiftL, shiftR)
+import Foreign.C.Types (CInt, CSize)
import GHC.Base (ByteArray#, MutableByteArray#, Int(..),
- indexWord16Array#, indexWordArray#, newByteArray#,
- readWord16Array#, readWordArray#, unsafeCoerce#,
- writeWord16Array#, writeWordArray#)
+ indexWord16Array#, newByteArray#,
+ unsafeCoerce#, writeWord16Array#)
import GHC.ST (ST(..), runST)
-import GHC.Word (Word16(..), Word(..))
+import GHC.Word (Word16(..))
import Prelude hiding (length, read)
-- | Immutable array type.
@@ -140,23 +142,6 @@ unsafeIndex Array{..} i@(I# i#) =
case indexWord16Array# aBA i# of r# -> (W16# r#)
{-# INLINE unsafeIndex #-}
--- | Unchecked read of an immutable array. May return garbage or
--- crash on an out-of-bounds access.
-unsafeIndexWord :: Array -> Int -> Word
-unsafeIndexWord Array{..} i@(I# i#) =
- CHECK_BOUNDS("unsafeIndexWord",aLen`div`wordFactor,i)
- case indexWordArray# aBA i# of r# -> (W# r#)
-{-# INLINE unsafeIndexWord #-}
-
--- | Unchecked read of a mutable array. May return garbage or
--- crash on an out-of-bounds access.
-unsafeRead :: MArray s -> Int -> ST s Word16
-unsafeRead MArray{..} i@(I# i#) = ST $ \s# ->
- CHECK_BOUNDS("unsafeRead",maLen,i)
- case readWord16Array# maBA i# s# of
- (# s2#, r# #) -> (# s2#, W16# r# #)
-{-# INLINE unsafeRead #-}
-
-- | Unchecked write of a mutable array. May return garbage or crash
-- on an out-of-bounds access.
unsafeWrite :: MArray s -> Int -> Word16 -> ST s ()
@@ -166,24 +151,6 @@ unsafeWrite MArray{..} i@(I# i#) (W16# e#) = ST $ \s1# ->
s2# -> (# s2#, () #)
{-# INLINE unsafeWrite #-}
--- | Unchecked read of a mutable array. May return garbage or
--- crash on an out-of-bounds access.
-unsafeReadWord :: MArray s -> Int -> ST s Word
-unsafeReadWord MArray{..} i@(I# i#) = ST $ \s# ->
- CHECK_BOUNDS("unsafeRead64",maLen`div`wordFactor,i)
- case readWordArray# maBA i# s# of
- (# s2#, r# #) -> (# s2#, W# r# #)
-{-# INLINE unsafeReadWord #-}
-
--- | Unchecked write of a mutable array. May return garbage or crash
--- on an out-of-bounds access.
-unsafeWriteWord :: MArray s -> Int -> Word -> ST s ()
-unsafeWriteWord MArray{..} i@(I# i#) (W# e#) = ST $ \s1# ->
- CHECK_BOUNDS("unsafeWriteWord",maLen`div`wordFactor,i)
- case writeWordArray# maBA i# e# s1# of
- s2# -> (# s2#, () #)
-{-# INLINE unsafeWriteWord #-}
-
-- | Convert an immutable array to a list.
toList :: Array -> Int -> Int -> [Word16]
toList ary off len = loop 0
@@ -207,65 +174,40 @@ run2 k = runST (do
arr <- unsafeFreeze marr
return (arr,b))
--- | The amount to divide or multiply by to switch between units of
--- 'Word16' and units of 'Word'.
-wordFactor :: Int
-wordFactor = SIZEOF_HSWORD `shiftR` 1
-
--- | Indicate whether an offset is word-aligned.
-wordAligned :: Int -> Bool
-wordAligned i = i .&. (wordFactor - 1) == 0
-
-- | Copy some elements of a mutable array.
copyM :: MArray s -- ^ Destination
-> Int -- ^ Destination offset
-> MArray s -- ^ Source
-> Int -- ^ Source offset
-> Int -- ^ Count
-> ST s ()
-copyM dest didx src sidx count =
+copyM dest didx src sidx count
+ | count <= 0 = return ()
+ | otherwise =
#if defined(ASSERTS)
assert (sidx + count <= length src) .
- assert (didx + count <= length dest) $
+ assert (didx + count <= length dest) .
#endif
- if srem == 0 && drem == 0
- then fast_loop 0
- else slow_loop 0
- where
- (swidx,srem) = sidx `divMod` wordFactor
- (dwidx,drem) = didx `divMod` wordFactor
- nwds = count `div` wordFactor
- fast_loop !i
- | i >= nwds = slow_loop (i * wordFactor)
- | otherwise = do w <- unsafeReadWord src (swidx+i)
- unsafeWriteWord dest (dwidx+i) w
- fast_loop (i+1)
- slow_loop !i
- | i >= count= return ()
- | otherwise = do unsafeRead src (sidx+i) >>= unsafeWrite dest (didx+i)
- slow_loop (i+1)
+ unsafeIOToST $ memcpyM (maBA dest) (fromIntegral didx)
+ (maBA src) (fromIntegral sidx)
+ (fromIntegral count)
+{-# INLINE copyM #-}
-- | Copy some elements of an immutable array.
copyI :: MArray s -- ^ Destination
-> Int -- ^ Destination offset
-> Array -- ^ Source
-> Int -- ^ Source offset
- -> Int -- ^ First offset in source /not/ to
+ -> Int -- ^ First offset in destination /not/ to
-- copy (i.e. /not/ length)
-> ST s ()
copyI dest i0 src j0 top
- | wordAligned i0 && wordAligned j0 = fast (i0 `div` wordFactor) (j0 `div` wordFactor)
- | otherwise = slow i0 j0
- where
- topwds = top `div` wordFactor
- fast !i !j
- | i >= topwds = slow (i * wordFactor) (j * wordFactor)
- | otherwise = do unsafeWriteWord dest i (src `unsafeIndexWord` j)
- fast (i+1) (j+1)
- slow !i !j
- | i >= top = return ()
- | otherwise = do unsafeWrite dest i (src `unsafeIndex` j)
- slow (i+1) (j+1)
+ | i0 >= top = return ()
+ | otherwise = unsafeIOToST $
+ memcpyI (maBA dest) (fromIntegral i0)
+ (aBA src) (fromIntegral j0)
+ (fromIntegral (top-i0))
+{-# INLINE copyI #-}
-- | Compare portions of two arrays for equality. No bounds checking
-- is performed.
@@ -275,22 +217,18 @@ equal :: Array -- ^ First
-> Int -- ^ Offset into second
-> Int -- ^ Count
-> Bool
-equal arrA offA arrB offB count
- | wordAligned offA && wordAligned offB = fast 0
- | otherwise = slow 0
- where
- countWords = count `div` wordFactor
- fast !i
- | i >= countWords = slow (i * wordFactor)
- | a /= b = False
- | otherwise = fast (i+1)
- where a = unsafeIndexWord arrA (offAW+i)
- b = unsafeIndexWord arrB (offBW+i)
- offAW = offA `div` wordFactor
- offBW = offB `div` wordFactor
- slow !i
- | i >= count = True
- | a /= b = False
- | otherwise = slow (i+1)
- where a = unsafeIndex arrA (offA+i)
- b = unsafeIndex arrB (offB+i)
+equal arrA offA arrB offB count = inlinePerformIO $ do
+ i <- memcmp (aBA arrA) (fromIntegral offA)
+ (aBA arrB) (fromIntegral offB) (fromIntegral count)
+ return $! i == 0
+{-# INLINE equal #-}
+
+foreign import ccall unsafe "_hs_text_memcpy" memcpyI
+ :: MutableByteArray# s -> CSize -> ByteArray# -> CSize -> CSize -> IO ()
+
+foreign import ccall unsafe "_hs_text_memcmp" memcmp
+ :: ByteArray# -> CSize -> ByteArray# -> CSize -> CSize -> IO CInt
+
+foreign import ccall unsafe "_hs_text_memcpy" memcpyM
+ :: MutableByteArray# s -> CSize -> MutableByteArray# s -> CSize -> CSize
+ -> IO ()
View
@@ -1,7 +1,7 @@
{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
-- |
-- Module : Data.Text.Unsafe
--- Copyright : (c) 2009, 2010 Bryan O'Sullivan
+-- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan
-- License : BSD-style
-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,
-- duncan@haskell.org
@@ -30,17 +30,9 @@ import Control.Exception (assert)
#endif
import Data.Text.Encoding.Utf16 (chr2)
import Data.Text.Internal (Text(..))
+import Data.Text.Unsafe.Base (inlineInterleaveST, inlinePerformIO)
import Data.Text.UnsafeChar (unsafeChr)
-import GHC.ST (ST(..))
import qualified Data.Text.Array as A
-#if defined(__GLASGOW_HASKELL__)
-# if __GLASGOW_HASKELL__ >= 611
-import GHC.IO (IO(IO))
-# else
-import GHC.IOBase (IO(IO))
-# endif
-import GHC.Base (realWorld#)
-#endif
-- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead'
-- omits the check for the empty case, so there is an obligation on
@@ -101,33 +93,6 @@ reverseIter (Text arr off _len) i
k = j - 1
{-# INLINE reverseIter #-}
--- | Just like unsafePerformIO, but we inline it. Big performance gains as
--- it exposes lots of things to further inlining. /Very unsafe/. In
--- particular, you should do no memory allocation inside an
--- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
---
-{-# INLINE inlinePerformIO #-}
-inlinePerformIO :: IO a -> a
-#if defined(__GLASGOW_HASKELL__)
-inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
-#else
-inlinePerformIO = unsafePerformIO
-#endif
-
--- | Allow an 'ST' computation to be deferred lazily. When passed an
--- action of type 'ST' @s@ @a@, the action will only be performed when
--- the value of @a@ is demanded.
---
--- This function is identical to the normal unsafeInterleaveST, but is
--- inlined and hence faster.
---
--- /Note/: This operation is highly unsafe, as it can introduce
--- externally visible non-determinism into an 'ST' action.
-inlineInterleaveST :: ST s a -> ST s a
-inlineInterleaveST (ST m) = ST $ \ s ->
- let r = case m s of (# _, res #) -> res in (# s, r #)
-{-# INLINE inlineInterleaveST #-}
-
-- | /O(1)/ Return the length of a 'Text' in units of 'Word16'. This
-- is useful for sizing a target array appropriately before using
-- 'unsafeCopyToPtr'.
View
@@ -0,0 +1,55 @@
+{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
+-- |
+-- Module : Data.Text.Unsafe.Base
+-- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan
+-- License : BSD-style
+-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,
+-- duncan@haskell.org
+-- Stability : experimental
+-- Portability : portable
+--
+-- A module containing unsafe operations, for very very careful use in
+-- heavily tested code.
+module Data.Text.Unsafe.Base
+ (
+ inlineInterleaveST
+ , inlinePerformIO
+ ) where
+
+import GHC.ST (ST(..))
+#if defined(__GLASGOW_HASKELL__)
+# if __GLASGOW_HASKELL__ >= 611
+import GHC.IO (IO(IO))
+# else
+import GHC.IOBase (IO(IO))
+# endif
+import GHC.Base (realWorld#)
+#endif
+
+
+-- | Just like unsafePerformIO, but we inline it. Big performance gains as
+-- it exposes lots of things to further inlining. /Very unsafe/. In
+-- particular, you should do no memory allocation inside an
+-- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
+--
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+#if defined(__GLASGOW_HASKELL__)
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+#else
+inlinePerformIO = unsafePerformIO
+#endif
+
+-- | Allow an 'ST' computation to be deferred lazily. When passed an
+-- action of type 'ST' @s@ @a@, the action will only be performed when
+-- the value of @a@ is demanded.
+--
+-- This function is identical to the normal unsafeInterleaveST, but is
+-- inlined and hence faster.
+--
+-- /Note/: This operation is highly unsafe, as it can introduce
+-- externally visible non-determinism into an 'ST' action.
+inlineInterleaveST :: ST s a -> ST s a
+inlineInterleaveST (ST m) = ST $ \ s ->
+ let r = case m s of (# _, res #) -> res in (# s, r #)
+{-# INLINE inlineInterleaveST #-}
View
@@ -0,0 +1,13 @@
+#include <string.h>
+
+void _hs_text_memcpy(void *dest, size_t doff, const void *src, size_t soff,
+ size_t n)
+{
+ memcpy(dest + (doff<<1), src + (soff<<1), n<<1);
+}
+
+int _hs_text_memcmp(const void *a, size_t aoff, const void *b, size_t boff,
+ size_t n)
+{
+ return memcmp(a + (aoff<<1), b + (boff<<1), n<<1);
+}
@@ -17,6 +17,7 @@ cabal-version: >=1.2
executable text-benchmarks
hs-source-dirs: src ../..
+ c-sources: ../../cbits/cbits.c
main-is: Data/Text/Benchmarks.hs
ghc-options: -Wall -O2
cpp-options: -DHAVE_DEEPSEQ
@@ -24,9 +24,7 @@ executable text-tests
main-is: Data/Text/Tests.hs
ghc-options:
- -Wall
- -threaded
- -O0
+ -Wall -threaded -O0 -rtsopts
if flag(hpc)
ghc-options:
@@ -54,8 +52,7 @@ executable text-tests-stdio
main-is: Data/Text/Tests/IO.hs
ghc-options:
- -Wall
- -threaded
+ -Wall -threaded -rtsopts
-- Optional HPC support
if flag(hpc)
@@ -68,6 +65,7 @@ executable text-tests-stdio
library
hs-source-dirs: ../..
+ c-sources: ../../cbits/cbits.c
exposed-modules:
Data.Text
Data.Text.Array
@@ -103,6 +101,7 @@ library
Data.Text.Lazy.Search
Data.Text.Search
Data.Text.Unsafe
+ Data.Text.Unsafe.Base
Data.Text.UnsafeChar
Data.Text.UnsafeShift
Data.Text.Util
Oops, something went wrong.

0 comments on commit a871416

Please sign in to comment.