Skip to content
Browse files

Eliminate dependency on the array package

--HG--
extra : convert_revision : 1e1c2e7ca040f4eb6749385ad6d5e630c3a63dbc
  • Loading branch information...
1 parent cf427a5 commit a986b8cb34a2dbeae4f55b62c27383ab56040d2c @bos committed Jan 26, 2009
Showing with 286 additions and 61 deletions.
  1. +12 −13 Data/Text.hs
  2. +237 −0 Data/Text/Array.hs
  3. +19 −21 Data/Text/Fusion.hs
  4. +5 −6 Data/Text/Internal.hs
  5. +7 −17 tests/Bench.hs
  6. +3 −1 tests/FusionBench.hs
  7. +3 −3 text.cabal
View
25 Data/Text.hs
@@ -112,8 +112,7 @@ import Prelude (Char,Bool,Int,Maybe,String,
IO, FilePath)
import Data.Char (isSpace)
import Control.Monad.ST(ST)
-import Data.Array.Base(unsafeNewArray_,unsafeWrite,unsafeAt)
-import Data.Array.ST(STUArray, runSTUArray)
+import qualified Data.Text.Array as A
import qualified Data.ByteString as B
import Data.ByteString(ByteString)
import qualified Data.List as L
@@ -218,18 +217,18 @@ snoc t c = unstream (S.snoc (stream t) c)
-- | /O(n)/ Appends one 'Text' to the other by copying both of them
-- into a new 'Text'. Subject to array fusion.
append :: Text -> Text -> Text
-append (Text arr1 off1 len1) (Text arr2 off2 len2) = Text (runSTUArray x) 0 len
+append (Text arr1 off1 len1) (Text arr2 off2 len2) = Text (A.run x) 0 len
where
len = len1+len2
x = do
- arr <- unsafeNewArray_ (0,len-1) :: ST s (STUArray s Int Word16)
+ arr <- A.unsafeNew len :: ST s (A.MArray s Word16)
copy arr1 off1 (len1+off1) arr 0
copy arr2 off2 (len2+off2) arr len1
return arr
where
copy arr i max arr' j
| i >= max = return ()
- | otherwise = do unsafeWrite arr' j (arr `unsafeAt` i)
+ | otherwise = do A.unsafeWrite arr' j (arr `A.unsafeIndex` i)
copy arr (i+1) max arr' (j+1)
{-# INLINE append #-}
@@ -254,8 +253,8 @@ last (Text arr off len)
| n < 0xDC00 || n > 0xDFFF = unsafeChr n
| otherwise = U16.chr2 n0 n
where
- n = unsafeAt arr (off+len-1)
- n0 = unsafeAt arr (off+len-2)
+ n = A.unsafeIndex arr (off+len-1)
+ n0 = A.unsafeIndex arr (off+len-2)
{-# INLINE [1] last #-}
{-# RULES
@@ -274,7 +273,7 @@ tail (Text arr off len)
| n >= 0xD800 && n <= 0xDBFF = Text arr (off+2) (len-2)
| otherwise = Text arr (off+1) (len-1)
where
- n = unsafeAt arr off
+ n = A.unsafeIndex arr off
{-# INLINE [1] tail #-}
@@ -286,7 +285,7 @@ init (Text arr off len) | len <= 0 = errorEmptyList "init"
| n >= 0xDC00 && n <= 0xDFFF = Text arr off (len-2)
| otherwise = Text arr off (len-1)
where
- n = unsafeAt arr (off+len-1)
+ n = A.unsafeIndex arr (off+len-1)
{-# INLINE [1] init #-}
{-# RULES
@@ -305,7 +304,7 @@ null t = S.null (stream t)
-- | /O(n)/ Returns the number of characters in a 'Text'.
-- Subject to array fusion.
length :: Text -> Int
-length length t = S.length (stream t)
+length t = S.length (stream t)
{-# INLINE length #-}
-- -----------------------------------------------------------------------------
@@ -458,7 +457,7 @@ take n t@(Text arr off len)
| c < 0xD800 || c > 0xDBFF = loop (i+1) (count+1)
| otherwise = loop (i+2) (count+1)
where
- c = arr `unsafeAt` i
+ c = arr `A.unsafeIndex` i
{-# INLINE [1] take #-}
{-# RULES
@@ -484,7 +483,7 @@ drop n t@(Text arr off len)
| c < 0xD800 || c > 0xDBFF = loop (i+1) (count+1) (l-1)
| otherwise = loop (i+2) (count+1) (l-2)
where
- c = arr `unsafeAt` i
+ c = arr `A.unsafeIndex` i
{-# INLINE [1] drop #-}
{-# RULES
@@ -580,7 +579,7 @@ words (Text arr off len) = loop0 off off
then []
else [(Text arr start (n-start))]
where
- c = arr `unsafeAt` n
+ c = arr `A.unsafeIndex` n
{-# INLINE words #-}
errorEmptyList :: String -> a
View
237 Data/Text/Array.hs
@@ -0,0 +1,237 @@
+{-# LANGUAGE CPP, ExistentialQuantification, MagicHash, Rank2Types,
+ ScopedTypeVariables, UnboxedTuples #-}
+-- |
+-- Module : Data.Text.Array
+-- Copyright : (c) Bryan O'Sullivan 2009
+--
+-- License : BSD-style
+-- Maintainer : bos@serpentine.com,
+-- Stability : experimental
+-- Portability : portable
+--
+-- Packed, unboxed, heap-resident arrays. Suitable for performance
+-- critical use, both in terms of large data quantities and high
+-- speed.
+--
+-- This module is intended to be imported @qualified@, to avoid name
+-- clashes with "Prelude" functions, e.g.
+--
+-- > import qualified Data.Text.Array as A
+--
+-- The names in this module resemble those in the 'Data.Array' family
+-- of modules, but are shorter due to the assumption of qualifid
+-- naming.
+module Data.Text.Array
+ (
+ -- * Types
+ IArray(..)
+ , Elt(..)
+ , Array
+ , MArray
+
+ -- * Functions
+ , empty
+ , new
+ , unsafeNew
+ , unsafeFreeze
+ , run
+ ) where
+
+#if defined(__GLASGOW_HASKELL__)
+#include "MachDeps.h"
+
+import GHC.Base (ByteArray#, MutableByteArray#, indexWord16Array#,
+ newByteArray#, readWord16Array#, unsafeCoerce#,
+ writeWord16Array#, (+#), (*#))
+import GHC.Prim (Int#)
+import GHC.ST (ST(..), runST)
+import GHC.Types (Int(..))
+import GHC.Word (Word16(..))
+
+#elif defined(__HUGS__)
+
+import Hugs.ByteArray (ByteArray, MutableByteArray, readByteArray,
+ newMutableByteArray, readMutableByteArray,
+ unsafeFreezeMutableByteArray, writeMutableByteArray)
+import Foreign.Storable (Storable, sizeOf)
+import Hugs.ST (ST(..), runST)
+
+#else
+# error not implemented for this compiler
+#endif
+
+import Data.Typeable
+import Data.Word (Word16)
+import Prelude hiding (length, read)
+
+#include "Typeable.h"
+
+-- | Immutable array type.
+data Array e = Array
+ {-# UNPACK #-} !Int -- length (in units of e, not bytes)
+#if defined(__GLASGOW_HASKELL__)
+ ByteArray#
+#elif defined(__HUGS__)
+ !ByteArray
+#endif
+
+INSTANCE_TYPEABLE1(Array,arrayTc,"Array")
+
+-- | Mutable array type, for use in the ST monad.
+data MArray s e = MArray
+ {-# UNPACK #-} !Int -- length (in units of e, not bytes)
+#if defined(__GLASGOW_HASKELL__)
+ (MutableByteArray# s)
+#elif defined(__HUGS__)
+ !(MutableByteArray s)
+#endif
+
+INSTANCE_TYPEABLE2(MArray,mArrayTc,"MArray")
+
+-- | Operations supported by all arrays.
+class IArray a where
+ -- | Return the length of an array.
+ length :: a -> Int
+
+instance IArray (Array e) where
+ length (Array len _ba) = len
+ {-# INLINE length #-}
+
+instance IArray (MArray s e) where
+ length (MArray len _ba) = len
+ {-# INLINE length #-}
+
+check :: IArray a => String -> a -> Int -> (a -> Int -> b) -> b
+check func ary i f
+ | i >= 0 && i < length ary = f ary i
+ | otherwise = error ("Data.Array.Flat." ++ func ++ ": index out of bounds")
+{-# INLINE check #-}
+
+-- | Operations supported by all elements that can be stored in
+-- arrays.
+class Elt e where
+ -- | Indicate how many bytes would be used for an array of the
+ -- given size.
+ bytesInArray :: Int -> e -> Int
+ -- | Unchecked read of an immutable array. May return garbage or
+ -- crash on an out-of-bounds access.
+ unsafeIndex :: Array e -> Int -> e
+ -- | Unchecked read of a mutable array. May return garbage or
+ -- crash on an out-of-bounds access.
+ unsafeRead :: MArray s e -> Int -> ST s e
+ -- | Unchecked write of a mutable array. May return garbage or
+ -- crash on an out-of-bounds access.
+ unsafeWrite :: MArray s e -> Int -> e -> ST s ()
+
+ -- | Read an immutable array. An invalid index results in a
+ -- runtime error.
+ index :: Array e -> Int -> e
+ index ary i = check "index" ary i unsafeIndex
+ {-# INLINE index #-}
+
+ -- | Read a mutable array. An invalid index results in a runtime
+ -- error.
+ read :: Array e -> Int -> ST s e
+ read ary i = check "read" ary i read
+ {-# INLINE read #-}
+
+ -- | Write a mutable array. An invalid index results in a runtime
+ -- error.
+ write :: Array e -> Int -> ST s e
+ write ary i = check "write" ary i write
+ {-# INLINE write #-}
+
+-- | Freeze a mutable array. Do not mutate the 'MArray' afterwards!
+unsafeFreeze :: MArray s e -> ST s (Array e)
+
+#if defined(__GLASGOW_HASKELL__)
+
+wORD16_SCALE :: Int# -> Int#
+wORD16_SCALE n# = scale# *# n# where I# scale# = SIZEOF_WORD16
+
+-- | Create an uninitialized mutable array.
+unsafeNew :: forall s e. Elt e => Int -> ST s (MArray s e)
+unsafeNew n = ST $ \s1# ->
+ case bytesInArray n (undefined :: e) of
+ (I# len#) -> case newByteArray# len# s1# of
+ (# s2#, marr# #) -> (# s2#, MArray n marr# #)
+{-# INLINE unsafeNew #-}
+
+unsafeFreeze (MArray len mba#) = ST $ \s# ->
+ (# s#, Array len (unsafeCoerce# mba#) #)
+{-# INLINE unsafeFreeze #-}
+
+-- | Create a mutable array, with its elements initialized with the
+-- given value.
+new :: forall s e. Elt e => Int -> e -> ST s (MArray s e)
+
+#elif defined(__HUGS__)
+
+unsafeIndexArray :: Storable e => Array e -> Int -> e
+unsafeIndexArray (Array off _len arr) i = readByteArray arr (off + i)
+
+unsafeReadMArray :: Storable e => MArray s e -> Int -> ST s e
+unsafeReadMArray (MArray _len marr) = readMutableByteArray marr
+
+unsafeWriteMArray :: Storable e => MArray s e -> Int -> e -> ST s ()
+unsafeWriteMArray (MArray _len marr) = writeMutableByteArray marr
+
+-- | Create an uninitialized mutable array.
+unsafeNew :: (Storable e) => Int -> ST s (MArray s e)
+unsafeNew n = new undefined
+ where new :: (Storable e) => e -> ST s (MArray s e)
+ new unused = do
+ marr <- newMutableByteArray (n * sizeOf unused)
+ return (MArray n marr)
+
+unsafeFreeze (MArray len mba) = do
+ ba <- unsafeFreezeMutableByteArray mba
+ return (Array 0 len ba)
+
+-- | Create a mutable array, with its elements initialized with the
+-- given value.
+new :: (Storable e) => Int -> e -> ST s (MArray s e)
+#endif
+
+new len initVal = do
+ marr <- unsafeNew len
+ sequence_ [unsafeWrite marr i initVal | i <- [0..len-1]]
+ return marr
+
+instance Elt Word16 where
+#if defined(__GLASGOW_HASKELL__)
+
+ bytesInArray (I# i#) _ = I# (wORD16_SCALE i#)
+ {-# INLINE bytesInArray #-}
+
+ unsafeIndex (Array _len ba#) (I# i#) =
+ case indexWord16Array# ba# i# of r# -> (W16# r#)
+ {-# INLINE unsafeIndex #-}
+
+ unsafeRead (MArray _len mba#) (I# i#) = ST $ \s# ->
+ case readWord16Array# mba# i# s# of
+ (# s2#, r# #) -> (# s2#, W16# r# #)
+ {-# INLINE unsafeRead #-}
+
+ unsafeWrite (MArray _len marr#) (I# i#) (W16# e#) = ST $ \s1# ->
+ case writeWord16Array# marr# i# e# s1# of
+ s2# -> (# s2#, () #)
+ {-# INLINE unsafeWrite #-}
+
+#elif defined(__HUGS__)
+
+ bytesInArray n w = sizeOf w * n
+ unsafeIndex = unsafeIndexArray
+ unsafeRead = unsafeReadMArray
+ unsafeWrite = unsafeWriteMArray
+
+#endif
+
+-- | An empty immutable array.
+empty :: Elt e => Array e
+empty = runST (unsafeNew 0 >>= unsafeFreeze)
+
+-- | Run an action in the ST monad and return an immutable array of
+-- its result.
+run :: Elt e => (forall s. ST s (MArray s e)) -> Array e
+run k = runST (k >>= unsafeFreeze)
View
40 Data/Text/Fusion.hs
@@ -57,7 +57,6 @@ import Data.Char (ord)
import Control.Exception(assert)
import Control.Monad(liftM2)
import Control.Monad.ST(runST,ST)
-import Data.Array.Base
import Data.Bits (shiftL, shiftR, (.&.))
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
@@ -70,6 +69,7 @@ import GHC.Exts (Int(..), (+#))
import System.IO.Unsafe(unsafePerformIO)
import Data.Text.Internal(Text(..),empty)
import Data.Text.UnsafeChar(unsafeChr,unsafeChr8,unsafeChr32)
+import qualified Data.Text.Array as A
import qualified Data.Text.Utf8 as U8
import qualified Data.Text.Utf16 as U16
import qualified Data.Text.Utf32 as U32
@@ -103,32 +103,32 @@ stream (Text arr off len) = Stream next off len
| n >= 0xD800 && n <= 0xDBFF = Yield (U16.chr2 n n2) (i + 2)
| otherwise = Yield (unsafeChr n) (i + 1)
where
- n = unsafeAt arr i
- n2 = unsafeAt arr (i + 1)
+ n = A.unsafeIndex arr i
+ n2 = A.unsafeIndex arr (i + 1)
{-# INLINE [0] stream #-}
-- | /O(n)/ Convert a Stream Char into a Text.
unstream :: Stream Char -> Text
unstream (Stream next0 s0 len) = Text (fst a) 0 (snd a)
where
- a :: ((UArray Int Word16),Int)
- a = runST ((unsafeNewArray_ (0,len+1) :: ST s (STUArray s Int Word16))
- >>= (\arr -> loop arr 0 (len+1) s0))
+ a :: ((A.Array Word16),Int)
+ a = runST ((A.unsafeNew len :: ST s (A.MArray s Word16))
+ >>= (\arr -> loop arr 0 len s0))
loop arr !i !top !s
- | i + 1 > top = do arr' <- unsafeNewArray_ (0,top*2)
+ | i + 1 > top = do arr' <- A.unsafeNew (top*2)
case next0 s of
- Done -> liftM2 (,) (unsafeFreezeSTUArray arr) (return i)
+ Done -> liftM2 (,) (A.unsafeFreeze arr) (return i)
_ -> copy arr arr' >> loop arr' i (top*2) s
| otherwise = case next0 s of
- Done -> liftM2 (,) (unsafeFreezeSTUArray arr) (return i)
+ Done -> liftM2 (,) (A.unsafeFreeze arr) (return i)
Skip s' -> loop arr i top s'
Yield x s'
| n < 0x10000 -> do
- unsafeWrite arr i (fromIntegral n :: Word16)
+ A.unsafeWrite arr i (fromIntegral n :: Word16)
loop arr (i+1) top s'
| otherwise -> do
- unsafeWrite arr i l
- unsafeWrite arr (i + 1) r
+ A.unsafeWrite arr i l
+ A.unsafeWrite arr (i + 1) r
loop arr (i+2) top s'
where
n :: Int
@@ -142,16 +142,14 @@ unstream (Stream next0 s0 len) = Text (fst a) 0 (snd a)
{-# INLINE [0] unstream #-}
-copy :: STUArray s Int Word16 -> STUArray s Int Word16 -> ST s ()
-copy src dest = (do
- (_,top) <- getBounds src
- copy_loop 0 top)
+copy :: A.MArray s Word16 -> A.MArray s Word16 -> ST s ()
+copy src dest = copy_loop 0
where
- copy_loop i top
- | i > top = return ()
- | otherwise = do v <- unsafeRead src i
- unsafeWrite dest i v
- copy_loop (i+1) top
+ len = A.length src
+ copy_loop i
+ | i > len = return ()
+ | otherwise = do A.unsafeRead src i >>= A.unsafeWrite dest i
+ copy_loop (i+1)
-- | /O(n)/ Determines if two streams are equal.
eq :: Ord a => Stream a -> Stream a -> Bool
View
11 Data/Text/Internal.hs
@@ -23,18 +23,17 @@ module Data.Text.Internal
, empty
) where
-import Data.Array.ST (newArray_,runSTUArray)
-import Data.Array.Unboxed (UArray)
+import qualified Data.Text.Array as A
import Data.Typeable (Typeable)
import Data.Word (Word16)
-- | A space efficient, packed, unboxed Unicode text type.
-data Text = Text {-# UNPACK #-} !(UArray Int Word16) -- payload
- {-# UNPACK #-} !Int -- offset
- {-# UNPACK #-} !Int -- length
+data Text = Text {-# UNPACK #-} !(A.Array Word16) -- payload
+ {-# UNPACK #-} !Int -- offset
+ {-# UNPACK #-} !Int -- length
deriving (Typeable)
-- | /O(1)/ The empty 'Text'.
empty :: Text
-empty = Text (runSTUArray (newArray_ (0,0))) 0 0
+empty = Text A.empty 0 0
{-# INLINE [1] empty #-}
View
24 tests/Bench.hs
@@ -17,36 +17,25 @@ import qualified Data.Text.Fusion as S
import Data.Text.Fusion (Encoding(..))
import qualified Data.List as L
-import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString as B8
import Data.ByteString (ByteString)
import Data.Word
import qualified System.IO.UTF8 as UTF8
-main = do ascii_bs <- B.readFile "ascii.txt"
+main = do ascii_bs <- B.readFile "text/test/ascii.txt"
let ascii_txt = T.decode ASCII ascii_bs
let ascii_str = T.unpack ascii_txt
force (ascii_txt,ascii_str,ascii_bs)
printf " # Text\t\tString\tByteString\n"
run 1 (ascii_txt,ascii_str,ascii_bs) ascii_tests
- performGC
- bmp_txt <- T.readFile Utf8 "bmp.txt"
- let bmp_str = T.unpack bmp_txt
- force (bmp_txt,bmp_str)
- printf " # Text\t\tString\t\n"
- run 1 (bmp_txt, bmp_str, B.empty) bmp_tests
- performGC
- smp_sip_txt <- T.readFile Utf8 "smp_sip.txt"
- let smp_sip_str = T.unpack smp_sip_txt
- force (smp_sip_txt, smp_sip_str)
- printf " # Text\t\tString\t\n"
- run 1 (smp_sip_txt, smp_sip_str,B.empty) smp_sip_tests
ascii_tests = [
("cons",
[F (app1 (T.cons '\88')),
F (app2 ((:) '\88') ),
- F (app3 (B.cons 88) )]),
+ F (app3 (B8.cons 88) )]),
("head",
[F (app1 T.head),
F (app2 L.head),
@@ -78,7 +67,7 @@ ascii_tests = [
("filter",
[F $ app1 $ T.filter (/= '\101'),
Flist $ app2 $ L.filter (/= '\101'),
- F $ app3 $ B.filter (/= 101)]),
+ F $ app3 $ B8.filter (/= 101)]),
("foldl'",
[F (app1 $ T.foldl' (\a w -> a+1::Int) 0),
F (app2 $ L.foldl' (\a w -> a+1::Int) 0),
@@ -95,7 +84,8 @@ ascii_tests = [
F (app3 $ B.take 30000000)]),
("words",
[F (app1 $ T.words),
- Flist (app2 $ L.words)])
+ Flist (app2 $ L.words),
+ F (app3 $ B.words)])
]
bmp_tests = [
View
4 tests/FusionBench.hs
@@ -1,4 +1,4 @@
-import Prelude hiding (zip,zip3,fst,snd)
+import Prelude hiding (zip,zip3)
import BenchUtils
import Data.Char
@@ -19,6 +19,8 @@ main = do ascii_str <- readFile "ascii.txt"
printf " # Text\t\tString\tByteString\n"
run 1 (ascii_txt,ascii_str,ascii_bs) ascii_tests
+trd (a,b,c) = c
+
ascii_tests = [
("map/map",
[F $ T.map pred . T.map succ . fst,
View
6 text.cabal
@@ -17,6 +17,7 @@ cabal-version: >= 1.2
library
exposed-modules:
Data.Text
+ Data.Text.Array
Data.Text.UnsafeChar
Data.Text.Internal
Data.Text.Fusion
@@ -25,12 +26,11 @@ library
Data.Text.Utf16
build-depends:
- base >= 3 && < 4,
- array >= 0.1 && < 0.3,
+ base < 5,
bytestring >= 0.9 && < 1.0
if impl(ghc >= 6.10)
build-depends:
- ghc-prim
+ ghc-prim, base >= 4
-- gather extensive profiling data for now
ghc-prof-options: -auto-all

0 comments on commit a986b8c

Please sign in to comment.
Something went wrong with that request. Please try again.