From 0751c02d4b2af77acb14d5b2cfa7a81e4bda942b Mon Sep 17 00:00:00 2001 From: Teo Camarasu Date: Tue, 22 Aug 2023 17:20:55 +0100 Subject: [PATCH] Share empty Text values Try to use the same heap object to represent all empty Text values. There are already attempts to enforce something like this through the `text` smart constructor, and in various functions by special casing the empty case. This patch expands on these attempts and adds some tests to ensure that empty Text values produced by this library are represented by the same heap object. Despite these efforts, we cannot guarantee that this will be the case in all situations and users of the library shouldn't rely on this behaviour for the correctness of their code. Resolves #492. --- src/Data/Text.hs | 251 +++++---------------- src/Data/Text/Foreign.hs | 1 + src/Data/Text/Internal.hs | 12 +- src/Data/Text/Internal/Lazy.hs | 14 +- src/Data/Text/Internal/Reverse.hs | 18 +- src/Data/Text/Internal/Transformation.hs | 266 +++++++++++++++++++++++ src/Data/Text/Lazy.hs | 14 +- src/Data/Text/Show.hs | 4 +- tests/Tests.hs | 2 + tests/Tests/ShareEmpty.hs | 126 +++++++++++ text.cabal | 2 + 11 files changed, 493 insertions(+), 217 deletions(-) create mode 100644 src/Data/Text/Internal/Transformation.hs create mode 100644 tests/Tests/ShareEmpty.hs diff --git a/src/Data/Text.hs b/src/Data/Text.hs index 60b1aeda..78ae7843 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -218,7 +218,7 @@ import Control.DeepSeq (NFData(rnf)) #if defined(ASSERTS) import Control.Exception (assert) #endif -import Data.Bits ((.&.), shiftR, shiftL) +import Data.Bits ((.&.)) import qualified Data.Char as Char import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf), constrIndex, Constr, mkConstr, DataType, mkDataType, Fixity(Prefix)) @@ -231,20 +231,20 @@ import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) import Data.Text.Internal.Reverse (reverse) -import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr2, chr3, chr4, ord2, ord3, ord4) +import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr3, ord2, ord3, ord4) import qualified Data.Text.Internal.Fusion as S -import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping) import qualified Data.Text.Internal.Fusion.Common as S import Data.Text.Encoding (decodeUtf8', encodeUtf8) import Data.Text.Internal.Fusion (stream, reverseStream, unstream) import Data.Text.Internal.Private (span_) import Data.Text.Internal (Text(..), empty, firstf, mul, safe, text, append, pack) -import Data.Text.Internal.Unsafe.Char (unsafeWrite, unsafeChr8) +import Data.Text.Internal.Unsafe.Char (unsafeWrite) import Data.Text.Show (singleton, unpack, unpackCString#, unpackCStringAscii#) import qualified Prelude as P import Data.Text.Unsafe (Iter(..), iter, iter_, lengthWord8, reverseIter, reverseIter_, unsafeHead, unsafeTail, iterArray, reverseIterArray) import Data.Text.Internal.Search (indices) +import Data.Text.Internal.Transformation (mapNonEmpty, toCaseFoldNonEmpty, toLowerNonEmpty, toUpperNonEmpty, filter_) #if defined(__HADDOCK__) import Data.ByteString (ByteString) import qualified Data.Text.Lazy as L @@ -253,7 +253,7 @@ import Data.Word (Word8) import Foreign.C.Types import GHC.Base (eqInt, neInt, gtInt, geInt, ltInt, leInt, ByteArray#) import qualified GHC.Exts as Exts -import GHC.Int (Int8, Int64(..)) +import GHC.Int (Int8) import GHC.Stack (HasCallStack) import qualified Language.Haskell.TH.Lib as TH import qualified Language.Haskell.TH.Syntax as TH @@ -418,10 +418,14 @@ instance TH.Lift Text where #if MIN_VERSION_template_haskell(2,16,0) lift txt = do let (ptr, len) = unsafePerformIO $ asForeignPtr txt - bytesQ = TH.litE . TH.bytesPrimL $ TH.mkBytes ptr 0 (P.fromIntegral len) - lenQ = liftInt (P.fromIntegral len) - liftInt n = (TH.appE (TH.conE 'Exts.I#) (TH.litE (TH.IntPrimL n))) - TH.varE 'unpackCStringLen# `TH.appE` bytesQ `TH.appE` lenQ + case len of + 0 -> TH.varE 'empty + _ -> + let + bytesQ = TH.litE . TH.bytesPrimL $ TH.mkBytes ptr 0 (P.fromIntegral len) + lenQ = liftInt (P.fromIntegral len) + liftInt n = (TH.appE (TH.conE 'Exts.I#) (TH.litE (TH.IntPrimL n))) + in TH.varE 'unpackCStringLen# `TH.appE` bytesQ `TH.appE` lenQ #else lift = TH.appE (TH.varE 'pack) . TH.stringE . unpack #endif @@ -499,7 +503,7 @@ uncons t@(Text arr off len) -- non-empty. This is a partial function, consider using 'unsnoc' instead. last :: HasCallStack => Text -> Char last t@(Text _ _ len) - | len <= 0 = emptyError "last" + | null t = emptyError "last" | otherwise = let Iter c _ = reverseIter t (len - 1) in c {-# INLINE [1] last #-} @@ -507,7 +511,7 @@ last t@(Text _ _ len) -- must be non-empty. This is a partial function, consider using 'uncons' instead. tail :: HasCallStack => Text -> Text tail t@(Text arr off len) - | len <= 0 = emptyError "tail" + | null t = emptyError "tail" | otherwise = text arr (off+d) (len-d) where d = iter_ t 0 {-# INLINE [1] tail #-} @@ -516,7 +520,7 @@ tail t@(Text arr off len) -- be non-empty. This is a partial function, consider using 'unsnoc' instead. init :: HasCallStack => Text -> Text init t@(Text arr off len) - | len <= 0 = emptyError "init" + | null t = emptyError "init" | otherwise = text arr off (len + reverseIter_ t (len - 1)) {-# INLINE [1] init #-} @@ -526,7 +530,7 @@ init t@(Text arr off len) -- @since 1.2.3.0 unsnoc :: Text -> Maybe (Text, Char) unsnoc t@(Text arr off len) - | len <= 0 = Nothing + | null t = Nothing | otherwise = Just (text arr off (len + d), c) where Iter c d = reverseIter t (len - 1) @@ -541,6 +545,10 @@ null (Text _arr _off len) = len <= 0 {-# INLINE [1] null #-} +{-# RULES + "TEXT null/empty -> True" null empty = True +#-} + -- | /O(1)/ Tests whether a 'Text' contains exactly one character. isSingleton :: Text -> Bool isSingleton = S.isSingleton . stream @@ -576,6 +584,8 @@ length = P.negate . measureOff P.maxBound length (intersperse c t) = max 0 (mul 2 (length t) - 1) "TEXT length/intercalate -> n*length" forall s ts. length (intercalate s ts) = let lenS = length s in max 0 (P.sum (P.map (\t -> length t + lenS) ts) - lenS) +"TEXT length/empty -> 0" + length empty = 0 #-} -- | /O(min(n,c))/ Compare the count of characters in a 'Text' to a number. @@ -639,28 +649,7 @@ compareLength t c = S.compareLengthI (stream t) c -- -- Performs replacement on invalid scalar values. map :: (Char -> Char) -> Text -> Text -map f = go - where - go (Text src o l) = runST $ do - marr <- A.new (l + 4) - outer marr (l + 4) o 0 - where - outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text - outer !dst !dstLen = inner - where - inner !srcOff !dstOff - | srcOff >= l + o = do - A.shrinkM dst dstOff - arr <- A.unsafeFreeze dst - return (Text arr 0 dstOff) - | dstOff + 4 > dstLen = do - let !dstLen' = dstLen + (l + o) - srcOff + 4 - dst' <- A.resizeM dst dstLen' - outer dst' dstLen' srcOff dstOff - | otherwise = do - let !(Iter c d) = iterArray src srcOff - d' <- unsafeWrite dst dstOff (safe (f c)) - inner (srcOff + d) (dstOff + d') +map f = \t -> if null t then empty else mapNonEmpty f t {-# INLINE [1] map #-} {-# RULES @@ -690,7 +679,7 @@ intercalate t = concat . L.intersperse t -- -- Performs replacement on invalid scalar values. intersperse :: Char -> Text -> Text -intersperse c t@(Text src o l) = if l == 0 then mempty else runST $ do +intersperse c t@(Text src o l) = if null t then empty else runST $ do let !cLen = utf8Length c dstLen = l + length t P.* cLen @@ -784,9 +773,9 @@ replace needle@(Text _ _ neeLen) (Text repArr repOff repLen) haystack@(Text hayArr hayOff hayLen) | neeLen == 0 = emptyError "replace" + | len == 0 = empty -- if also haystack is empty, we can't just return 'haystack' as worker/wrapper might duplicate it | L.null ixs = haystack - | len > 0 = Text (A.run x) 0 len - | otherwise = empty + | otherwise = Text (A.run x) 0 len where ixs = indices needle haystack len = hayLen - (neeLen - repLen) `mul` L.length ixs @@ -823,84 +812,6 @@ replace needle@(Text _ _ neeLen) -- sensitivity should use appropriate versions of the -- . -caseConvert :: (Word8 -> Word8) -> (Exts.Char# -> _ {- unboxed Int64 -}) -> Text -> Text -caseConvert ascii remap (Text src o l) = runST $ do - -- Case conversion a single code point may produce up to 3 code-points, - -- each up to 4 bytes, so 12 in total. - dst <- A.new (l + 12) - outer dst l o 0 - where - outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text - outer !dst !dstLen = inner - where - inner !srcOff !dstOff - | srcOff >= o + l = do - A.shrinkM dst dstOff - arr <- A.unsafeFreeze dst - return (Text arr 0 dstOff) - | dstOff + 12 > dstLen = do - -- Ensure to extend the buffer by at least 12 bytes. - let !dstLen' = dstLen + max 12 (l + o - srcOff) - dst' <- A.resizeM dst dstLen' - outer dst' dstLen' srcOff dstOff - -- If a character is to remain unchanged, no need to decode Char back into UTF8, - -- just copy bytes from input. - | otherwise = do - let m0 = A.unsafeIndex src srcOff - m1 = A.unsafeIndex src (srcOff + 1) - m2 = A.unsafeIndex src (srcOff + 2) - m3 = A.unsafeIndex src (srcOff + 3) - !d = utf8LengthByLeader m0 - case d of - 1 -> do - A.unsafeWrite dst dstOff (ascii m0) - inner (srcOff + 1) (dstOff + 1) - 2 -> do - let !(Exts.C# c) = chr2 m0 m1 - dstOff' <- case I64# (remap c) of - 0 -> do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - pure $ dstOff + 2 - i -> writeMapping i dstOff - inner (srcOff + 2) dstOff' - 3 -> do - let !(Exts.C# c) = chr3 m0 m1 m2 - dstOff' <- case I64# (remap c) of - 0 -> do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - A.unsafeWrite dst (dstOff + 2) m2 - pure $ dstOff + 3 - i -> writeMapping i dstOff - inner (srcOff + 3) dstOff' - _ -> do - let !(Exts.C# c) = chr4 m0 m1 m2 m3 - dstOff' <- case I64# (remap c) of - 0 -> do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - A.unsafeWrite dst (dstOff + 2) m2 - A.unsafeWrite dst (dstOff + 3) m3 - pure $ dstOff + 4 - i -> writeMapping i dstOff - inner (srcOff + 4) dstOff' - - writeMapping :: Int64 -> Int -> ST s Int - writeMapping 0 dstOff = pure dstOff - writeMapping i dstOff = do - let (ch, j) = chopOffChar i - d <- unsafeWrite dst dstOff ch - writeMapping j (dstOff + d) - - chopOffChar :: Int64 -> (Char, Int64) - chopOffChar ab = (chr a, ab `shiftR` 21) - where - chr (Exts.I# n) = Exts.C# (Exts.chr# n) - mask = (1 `shiftL` 21) - 1 - a = P.fromIntegral $ ab .&. mask -{-# INLINE caseConvert #-} - -- | /O(n)/ Convert a string to folded case. -- -- This function is mainly useful for performing caseless (also known @@ -918,7 +829,9 @@ caseConvert ascii remap (Text src o l) = runST $ do -- U+00B5) is case folded to \"μ\" (small letter mu, U+03BC) -- instead of itself. toCaseFold :: Text -> Text -toCaseFold = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) foldMapping xs +toCaseFold = \t -> + if null t then empty + else toCaseFoldNonEmpty t {-# INLINE toCaseFold #-} -- | /O(n)/ Convert a string to lower case, using simple case @@ -929,7 +842,9 @@ toCaseFold = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) foldM -- U+0130) maps to the sequence \"i\" (Latin small letter i, U+0069) -- followed by \" ̇\" (combining dot above, U+0307). toLower :: Text -> Text -toLower = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) lowerMapping xs +toLower = \t -> + if null t then empty + else toLowerNonEmpty t {-# INLINE toLower #-} -- | /O(n)/ Convert a string to upper case, using simple case @@ -939,7 +854,9 @@ toLower = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) lowerMap -- instance, the German \"ß\" (eszett, U+00DF) maps to the -- two-letter sequence \"SS\". toUpper :: Text -> Text -toUpper = \xs -> caseConvert (\w -> if w - 97 <= 25 then w - 32 else w) upperMapping xs +toUpper = \t -> + if null t then empty + else toUpperNonEmpty t {-# INLINE toUpper #-} -- | /O(n)/ Convert a string to title case, using simple case @@ -1370,8 +1287,10 @@ unfoldrN n f s = unstream (S.unfoldrN n (firstf safe . f) s) take :: Int -> Text -> Text take n t@(Text arr off len) | n <= 0 = empty - | n >= len = t - | otherwise = let m = measureOff n t in if m >= 0 then text arr off m else t + | n >= len || m >= len || m < 0 = t + | otherwise = Text arr off m + where + m = measureOff n t {-# INLINE [1] take #-} -- | /O(n)/ If @t@ is long enough to contain @n@ characters, 'measureOff' @n@ @t@ @@ -1423,8 +1342,8 @@ iterNEnd n t@(Text _arr _off len) = loop (len-1) n drop :: Int -> Text -> Text drop n t@(Text arr off len) | n <= 0 = t - | n >= len = empty - | otherwise = if m >= 0 then text arr (off+m) (len-m) else mempty + | n >= len || m >= len || m < 0 = empty + | otherwise = Text arr (off+m) (len-m) where m = measureOff n t {-# INLINE [1] drop #-} @@ -1532,9 +1451,10 @@ strip = dropAround Char.isSpace splitAt :: Int -> Text -> (Text, Text) splitAt n t@(Text arr off len) | n <= 0 = (empty, t) - | n >= len = (t, empty) - | otherwise = let m = measureOff n t in - if m >= 0 then (text arr off m, text arr (off+m) (len-m)) else (t, mempty) + | n >= len || m >= len || m < 0 = (t, empty) + | otherwise = (Text arr off m, Text arr (off+m) (len-m)) + where + m = measureOff n t -- | /O(n)/ 'span', applied to a predicate @p@ and text @t@, returns -- a pair whose first element is the longest prefix (possibly empty) @@ -1632,9 +1552,11 @@ group = groupBy (==) -- | /O(n)/ Return all initial segments of the given 'Text', shortest -- first. inits :: Text -> [Text] -inits t@(Text arr off len) = loop 0 - where loop i | i >= len = [t] - | otherwise = Text arr off i : loop (i + iter_ t i) +inits t = empty : case t of + Text arr off len -> + let loop i | i >= len = [] + | otherwise = let !j = i + iter_ t i in Text arr off j : loop j + in loop 0 -- | /O(n)/ Return all final segments of the given 'Text', longest -- first. @@ -1704,8 +1626,9 @@ splitOn pat@(Text _ _ l) src@(Text arr off len) -- >>> split (=='a') "" -- [""] split :: (Char -> Bool) -> Text -> [Text] -split _ t@(Text _off _arr 0) = [t] -split p t = loop t +split p t + | null t = [empty] + | otherwise = loop t where loop s | null s' = [l] | otherwise = l : loop (unsafeTail s') where (# l, s' #) = span_ (not . p) s @@ -1761,67 +1684,7 @@ partition p t = (filter p t, filter (not . p) t) -- returns a 'Text' containing those characters that satisfy the -- predicate. filter :: (Char -> Bool) -> Text -> Text -filter p = go - where - go (Text src o l) = runST $ do - -- It's tempting to allocate l elements at once and avoid resizing. - -- However, this can be unacceptable in scenarios where a huge array - -- is filtered with a rare predicate, resulting in a much shorter buffer. - let !dstLen = min l 64 - dst <- A.new dstLen - outer dst dstLen o 0 - where - outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text - outer !dst !dstLen = inner - where - inner !srcOff !dstOff - | srcOff >= o + l = do - A.shrinkM dst dstOff - arr <- A.unsafeFreeze dst - return (Text arr 0 dstOff) - | dstOff + 4 > dstLen = do - -- Double size of the buffer, unless it becomes longer than - -- source string. Ensure to extend it by least 4 bytes. - let !dstLen' = dstLen + max 4 (min (l + o - srcOff) dstLen) - dst' <- A.resizeM dst dstLen' - outer dst' dstLen' srcOff dstOff - -- In case of success, filter writes exactly the same character - -- it just read (this is not a case for map, for example). - -- We leverage this fact below: no need to decode Char back into UTF8, - -- just copy bytes from input. - | otherwise = do - let m0 = A.unsafeIndex src srcOff - m1 = A.unsafeIndex src (srcOff + 1) - m2 = A.unsafeIndex src (srcOff + 2) - m3 = A.unsafeIndex src (srcOff + 3) - !d = utf8LengthByLeader m0 - case d of - 1 -> do - let !c = unsafeChr8 m0 - if not (p c) then inner (srcOff + 1) dstOff else do - A.unsafeWrite dst dstOff m0 - inner (srcOff + 1) (dstOff + 1) - 2 -> do - let !c = chr2 m0 m1 - if not (p c) then inner (srcOff + 2) dstOff else do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - inner (srcOff + 2) (dstOff + 2) - 3 -> do - let !c = chr3 m0 m1 m2 - if not (p c) then inner (srcOff + 3) dstOff else do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - A.unsafeWrite dst (dstOff + 2) m2 - inner (srcOff + 3) (dstOff + 3) - _ -> do - let !c = chr4 m0 m1 m2 m3 - if not (p c) then inner (srcOff + 4) dstOff else do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - A.unsafeWrite dst (dstOff + 2) m2 - A.unsafeWrite dst (dstOff + 3) m3 - inner (srcOff + 4) (dstOff + 4) +filter p = filter_ text p {-# INLINE [1] filter #-} {-# RULES @@ -2205,7 +2068,9 @@ overflowError fun = P.error $ "Data.Text." ++ fun ++ ": size overflow" -- copy \"breaks the link\" to the original array, allowing it to be -- garbage collected if there are no other live references to it. copy :: Text -> Text -copy (Text arr off len) = Text (A.run go) 0 len +copy t@(Text arr off len) + | null t = empty + | otherwise = Text (A.run go) 0 len where go :: ST s (A.MArray s) go = do diff --git a/src/Data/Text/Foreign.hs b/src/Data/Text/Foreign.hs index 975e777c..82f010e7 100644 --- a/src/Data/Text/Foreign.hs +++ b/src/Data/Text/Foreign.hs @@ -71,6 +71,7 @@ newtype I8 = I8 Int fromPtr :: Ptr Word8 -- ^ source array -> I8 -- ^ length of source array (in 'Word8' units) -> IO Text +fromPtr _ (I8 0) = pure empty fromPtr ptr (I8 len) = unsafeSTToIO $ do dst <- A.new len A.copyFromPointer dst 0 ptr len diff --git a/src/Data/Text/Internal.hs b/src/Data/Text/Internal.hs index 3fc271ea..4ccde712 100644 --- a/src/Data/Text/Internal.hs +++ b/src/Data/Text/Internal.hs @@ -36,7 +36,6 @@ module Data.Text.Internal , safe -- * Code that must be here for accessibility , empty - , empty_ , append -- * Utilities , firstf @@ -90,12 +89,7 @@ text_ arr off len = -- | /O(1)/ The empty 'Text'. empty :: Text empty = Text A.empty 0 0 -{-# INLINE [1] empty #-} - --- | A non-inlined version of 'empty'. -empty_ :: Text -empty_ = Text A.empty 0 0 -{-# NOINLINE empty_ #-} +{-# NOINLINE empty #-} -- | /O(n)/ Appends one 'Text' to the other by copying both of them -- into a new 'Text'. @@ -117,6 +111,7 @@ append a@(Text arr1 off1 len1) b@(Text arr2 off2 len2) -- | Construct a 'Text' without invisibly pinning its byte array in -- memory if its length has dwindled to zero. +-- It ensures that empty 'Text' values are shared. text :: #if defined(ASSERTS) HasCallStack => @@ -127,7 +122,7 @@ text :: -> Text text arr off len | len == 0 = empty | otherwise = text_ arr off len -{-# INLINE text #-} +{-# INLINE [0] text #-} textP :: A.Array -> Int -> Int -> Text {-# DEPRECATED textP "Use text instead" #-} @@ -247,6 +242,7 @@ int64ToInt32 = fromIntegral -- >>> Data.Text.unpack (pack "\55555") -- "\65533" pack :: String -> Text +pack [] = empty pack xs = runST $ do -- It's tempting to allocate a buffer of 4 * length xs bytes, -- but not only it's wasteful for predominantly ASCII arguments, diff --git a/src/Data/Text/Internal/Lazy.hs b/src/Data/Text/Internal/Lazy.hs index 928e5cfc..74c7ba90 100644 --- a/src/Data/Text/Internal/Lazy.hs +++ b/src/Data/Text/Internal/Lazy.hs @@ -46,6 +46,7 @@ import Data.Typeable (Typeable) import Foreign.Storable (sizeOf) import qualified Data.Text.Array as A import qualified Data.Text.Internal as T +import qualified Data.Text as T data Text = Empty | Chunk {-# UNPACK #-} !T.Text Text @@ -82,9 +83,16 @@ showStructure (Chunk t ts) = -- | Smart constructor for 'Chunk'. Guarantees the data type invariant. chunk :: T.Text -> Text -> Text -{-# INLINE chunk #-} -chunk t@(T.Text _ _ len) ts | len == 0 = ts - | otherwise = Chunk t ts +{-# INLINE [0] chunk #-} +chunk t ts | T.null t = ts + | otherwise = Chunk t ts + +{-# RULES +"TEXT chunk/text" forall arr off len. + chunk (T.text arr off len) = chunk (T.Text arr off len) +"TEXT chunk/empty" forall ts. + chunk T.empty ts = ts +#-} -- | Smart constructor for 'Empty'. empty :: Text diff --git a/src/Data/Text/Internal/Reverse.hs b/src/Data/Text/Internal/Reverse.hs index f3b91b46..0759acac 100644 --- a/src/Data/Text/Internal/Reverse.hs +++ b/src/Data/Text/Internal/Reverse.hs @@ -11,7 +11,7 @@ {-# OPTIONS_GHC -ddump-to-file -ddump-simpl -dsuppress-all -dno-suppress-type-signatures #-} -- | Implements 'reverse', using efficient C routines by default. -module Data.Text.Internal.Reverse (reverse) where +module Data.Text.Internal.Reverse (reverse, reverseNonEmpty) where #if !defined(PURE_HASKELL) import GHC.Exts as Exts @@ -25,7 +25,7 @@ import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader) import GHC.Stack (HasCallStack) #endif import Prelude hiding (reverse) -import Data.Text.Internal (Text(..)) +import Data.Text.Internal (Text(..), empty) import Control.Monad.ST (runST) import qualified Data.Text.Array as A @@ -41,8 +41,16 @@ reverse :: HasCallStack => #endif Text -> Text +reverse (Text _ _ 0) = empty +reverse t = reverseNonEmpty t +{-# INLINE reverse #-} + +-- | /O(n)/ Reverse the characters of a string. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +reverseNonEmpty :: + Text -> Text #if defined(PURE_HASKELL) -reverse (Text src off len) = runST $ do +reverseNonEmtpy (Text src off len) = runST $ do dest <- A.new len _ <- reversePoints src off dest len result <- A.unsafeFreeze dest @@ -82,13 +90,13 @@ reversePoints src xx dest yy = go xx yy where A.copyI pLen dest yNext src x go (x + pLen) yNext #else -reverse (Text (A.ByteArray ba) off len) = runST $ do +reverseNonEmpty (Text (A.ByteArray ba) off len) = runST $ do marr@(A.MutableByteArray mba) <- A.new len unsafeIOToST $ c_reverse mba ba (fromIntegral off) (fromIntegral len) brr <- A.unsafeFreeze marr return $ Text brr 0 len #endif -{-# INLINE reverse #-} +{-# INLINE reverseNonEmpty #-} #if !defined(PURE_HASKELL) -- | The input buffer (src :: ByteArray#, off :: CSize, len :: CSize) diff --git a/src/Data/Text/Internal/Transformation.hs b/src/Data/Text/Internal/Transformation.hs new file mode 100644 index 00000000..026c3272 --- /dev/null +++ b/src/Data/Text/Internal/Transformation.hs @@ -0,0 +1,266 @@ +{-# LANGUAGE BangPatterns, CPP, MagicHash #-} +{-# LANGUAGE Trustworthy #-} +{-# LANGUAGE UnliftedFFITypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE PartialTypeSignatures #-} + +{-# OPTIONS_GHC -fno-warn-orphans #-} +{-# OPTIONS_GHC -Wno-partial-type-signatures #-} + +-- | +-- Module : Data.Text.Internal.Transformation +-- Copyright : (c) 2008, 2009 Tom Harper, +-- (c) 2009, 2010 Bryan O'Sullivan, +-- (c) 2009 Duncan Coutts +-- +-- License : BSD-style +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : GHC +-- +-- This module holds functions shared between the strict and lazy implementations of @Text@ transformations. + +module Data.Text.Internal.Transformation + ( mapNonEmpty + , reverseNonEmpty + , toCaseFoldNonEmpty + , toLowerNonEmpty + , toUpperNonEmpty + , filter_ + ) where + +import Prelude (Char, Bool(..), Int, + Ord(..), + Monad(..), pure, + (+), (-), ($), + not, return, otherwise, IO) +#if defined(ASSERTS) +import Control.Exception (assert) +#endif +import Data.Bits ((.&.), shiftR, shiftL) +import Control.Monad.ST (ST, runST) +import Control.Monad.ST.Unsafe (unsafeIOToST) +import qualified Data.Text.Array as A +import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader, chr2, chr3, chr4) +import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping) +import Data.Text.Internal (Text(..), safe) +import Data.Text.Internal.Unsafe.Char (unsafeWrite, unsafeChr8) +import qualified Prelude as P +import Data.Text.Unsafe (Iter(..), iterArray) +import Data.Word (Word8) +import Foreign.C.Types +import GHC.Base (ByteArray#) +import qualified GHC.Exts as Exts +import GHC.Int (Int64(..)) + +-- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to +-- each element of @t@. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +mapNonEmpty :: (Char -> Char) -> Text -> Text +mapNonEmpty f = go + where + go (Text src o l) = runST $ do + marr <- A.new (l + 4) + outer marr (l + 4) o 0 + where + outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text + outer !dst !dstLen = inner + where + inner !srcOff !dstOff + | srcOff >= l + o = do + A.shrinkM dst dstOff + arr <- A.unsafeFreeze dst + return (Text arr 0 dstOff) + | dstOff + 4 > dstLen = do + let !dstLen' = dstLen + (l + o) - srcOff + 4 + dst' <- A.resizeM dst dstLen' + outer dst' dstLen' srcOff dstOff + | otherwise = do + let !(Iter c d) = iterArray src srcOff + d' <- unsafeWrite dst dstOff (safe (f c)) + inner (srcOff + d) (dstOff + d') +{-# INLINE mapNonEmpty #-} + +-- | /O(n)/ Reverse the characters of a string. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +reverseNonEmpty :: + Text -> Text +reverseNonEmpty (Text (A.ByteArray ba) off len) = runST $ do + marr@(A.MutableByteArray mba) <- A.new len + unsafeIOToST $ c_reverse mba ba (intToCSize off) (intToCSize len) + brr <- A.unsafeFreeze marr + return $ Text brr 0 len +{-# INLINE reverseNonEmpty #-} + +-- | The input buffer (src :: ByteArray#, off :: CSize, len :: CSize) +-- must specify a valid UTF-8 sequence, this condition is not checked. +foreign import ccall unsafe "_hs_text_reverse" c_reverse + :: Exts.MutableByteArray# s -> ByteArray# -> CSize -> CSize -> IO () + +intToCSize :: Int -> CSize +intToCSize = P.fromIntegral + +caseConvert :: (Word8 -> Word8) -> (Exts.Char# -> _ {- unboxed Int64 -}) -> Text -> Text +caseConvert ascii remap (Text src o l) = runST $ do + -- Case conversion a single code point may produce up to 3 code-points, + -- each up to 4 bytes, so 12 in total. + dst <- A.new (l + 12) + outer dst l o 0 + where + outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text + outer !dst !dstLen = inner + where + inner !srcOff !dstOff + | srcOff >= o + l = do + A.shrinkM dst dstOff + arr <- A.unsafeFreeze dst + return (Text arr 0 dstOff) + | dstOff + 12 > dstLen = do + -- Ensure to extend the buffer by at least 12 bytes. + let !dstLen' = dstLen + max 12 (l + o - srcOff) + dst' <- A.resizeM dst dstLen' + outer dst' dstLen' srcOff dstOff + -- If a character is to remain unchanged, no need to decode Char back into UTF8, + -- just copy bytes from input. + | otherwise = do + let m0 = A.unsafeIndex src srcOff + m1 = A.unsafeIndex src (srcOff + 1) + m2 = A.unsafeIndex src (srcOff + 2) + m3 = A.unsafeIndex src (srcOff + 3) + !d = utf8LengthByLeader m0 + case d of + 1 -> do + A.unsafeWrite dst dstOff (ascii m0) + inner (srcOff + 1) (dstOff + 1) + 2 -> do + let !(Exts.C# c) = chr2 m0 m1 + dstOff' <- case I64# (remap c) of + 0 -> do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + pure $ dstOff + 2 + i -> writeMapping i dstOff + inner (srcOff + 2) dstOff' + 3 -> do + let !(Exts.C# c) = chr3 m0 m1 m2 + dstOff' <- case I64# (remap c) of + 0 -> do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + A.unsafeWrite dst (dstOff + 2) m2 + pure $ dstOff + 3 + i -> writeMapping i dstOff + inner (srcOff + 3) dstOff' + _ -> do + let !(Exts.C# c) = chr4 m0 m1 m2 m3 + dstOff' <- case I64# (remap c) of + 0 -> do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + A.unsafeWrite dst (dstOff + 2) m2 + A.unsafeWrite dst (dstOff + 3) m3 + pure $ dstOff + 4 + i -> writeMapping i dstOff + inner (srcOff + 4) dstOff' + + writeMapping :: Int64 -> Int -> ST s Int + writeMapping 0 dstOff = pure dstOff + writeMapping i dstOff = do + let (ch, j) = chopOffChar i + d <- unsafeWrite dst dstOff ch + writeMapping j (dstOff + d) + + chopOffChar :: Int64 -> (Char, Int64) + chopOffChar ab = (chr a, ab `shiftR` 21) + where + chr (Exts.I# n) = Exts.C# (Exts.chr# n) + mask = (1 `shiftL` 21) - 1 + a = P.fromIntegral $ ab .&. mask +{-# INLINE caseConvert #-} + + +-- | /O(n)/ Convert a string to folded case. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +toCaseFoldNonEmpty :: Text -> Text +toCaseFoldNonEmpty = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) foldMapping xs +{-# INLINE toCaseFoldNonEmpty #-} + +-- | /O(n)/ Convert a string to lower case, using simple case +-- conversion. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +toLowerNonEmpty :: Text -> Text +toLowerNonEmpty = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) lowerMapping xs +{-# INLINE toLowerNonEmpty #-} + +-- | /O(n)/ Convert a string to upper case, using simple case +-- conversion. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +toUpperNonEmpty :: Text -> Text +toUpperNonEmpty = \xs -> caseConvert (\w -> if w - 97 <= 25 then w - 32 else w) upperMapping xs +{-# INLINE toUpperNonEmpty #-} + +-- | /O(n)/ 'filter_', applied to a continuation, a predicate and a @Text@, +-- calls the continuation with the @Text@ containing only the characters satisfying the predicate. +filter_ :: forall a. (A.Array -> Int -> Int -> a) -> (Char -> Bool) -> Text -> a +filter_ mkText p = go + where + go (Text src o l) = runST $ do + -- It's tempting to allocate l elements at once and avoid resizing. + -- However, this can be unacceptable in scenarios where a huge array + -- is filtered with a rare predicate, resulting in a much shorter buffer. + let !dstLen = min l 64 + dst <- A.new dstLen + outer dst dstLen o 0 + where + outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s a + outer !dst !dstLen = inner + where + inner !srcOff !dstOff + | srcOff >= o + l = do + A.shrinkM dst dstOff + arr <- A.unsafeFreeze dst + return $ mkText arr 0 dstOff + | dstOff + 4 > dstLen = do + -- Double size of the buffer, unless it becomes longer than + -- source string. Ensure to extend it by least 4 bytes. + let !dstLen' = dstLen + max 4 (min (l + o - srcOff) dstLen) + dst' <- A.resizeM dst dstLen' + outer dst' dstLen' srcOff dstOff + -- In case of success, filter writes exactly the same character + -- it just read (this is not a case for map, for example). + -- We leverage this fact below: no need to decode Char back into UTF8, + -- just copy bytes from input. + | otherwise = do + let m0 = A.unsafeIndex src srcOff + m1 = A.unsafeIndex src (srcOff + 1) + m2 = A.unsafeIndex src (srcOff + 2) + m3 = A.unsafeIndex src (srcOff + 3) + !d = utf8LengthByLeader m0 + case d of + 1 -> do + let !c = unsafeChr8 m0 + if not (p c) then inner (srcOff + 1) dstOff else do + A.unsafeWrite dst dstOff m0 + inner (srcOff + 1) (dstOff + 1) + 2 -> do + let !c = chr2 m0 m1 + if not (p c) then inner (srcOff + 2) dstOff else do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + inner (srcOff + 2) (dstOff + 2) + 3 -> do + let !c = chr3 m0 m1 m2 + if not (p c) then inner (srcOff + 3) dstOff else do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + A.unsafeWrite dst (dstOff + 2) m2 + inner (srcOff + 3) (dstOff + 3) + _ -> do + let !c = chr4 m0 m1 m2 m3 + if not (p c) then inner (srcOff + 4) dstOff else do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + A.unsafeWrite dst (dstOff + 2) m2 + A.unsafeWrite dst (dstOff + 3) m3 + inner (srcOff + 4) (dstOff + 4) +{-# INLINE filter_ #-} diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 2195793a..0ba70d93 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -232,6 +232,8 @@ import Data.Text.Internal.Lazy.Fusion (stream, unstream) import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldlChunks, foldrChunks, smallChunkSize, defaultChunkSize, equal) import Data.Text.Internal (firstf, safe, text) +import Data.Text.Internal.Reverse (reverseNonEmpty) +import Data.Text.Internal.Transformation (mapNonEmpty, toCaseFoldNonEmpty, toLowerNonEmpty, toUpperNonEmpty, filter_) import Data.Text.Lazy.Encoding (decodeUtf8', encodeUtf8) import Data.Text.Internal.Lazy.Search (indices) import qualified GHC.CString as GHC @@ -577,7 +579,7 @@ compareLength t c = S.compareLengthI (stream t) c -- each element of @t@. Performs replacement on -- invalid scalar values. map :: (Char -> Char) -> Text -> Text -map f = foldrChunks (Chunk . T.map f) Empty +map f = foldrChunks (Chunk . mapNonEmpty f) Empty {-# INLINE [1] map #-} {-# RULES @@ -663,7 +665,7 @@ reverse :: Text -> Text reverse = rev Empty where rev a Empty = a - rev a (Chunk t ts) = rev (Chunk (T.reverse t) a) ts + rev a (Chunk t ts) = rev (Chunk (reverseNonEmpty t) a) ts -- | /O(m+n)/ Replace every non-overlapping occurrence of @needle@ in -- @haystack@ with @replacement@. @@ -728,7 +730,7 @@ replace s d = intercalate d . splitOn s -- case folded to the Greek small letter letter mu (U+03BC) instead of -- itself. toCaseFold :: Text -> Text -toCaseFold = foldrChunks (\chnk acc -> Chunk (T.toCaseFold chnk) acc) Empty +toCaseFold = foldrChunks (\chnk acc -> Chunk (toCaseFoldNonEmpty chnk) acc) Empty {-# INLINE toCaseFold #-} -- | /O(n)/ Convert a string to lower case, using simple case @@ -739,7 +741,7 @@ toCaseFold = foldrChunks (\chnk acc -> Chunk (T.toCaseFold chnk) acc) Empty -- to the sequence Latin small letter i (U+0069) followed by combining -- dot above (U+0307). toLower :: Text -> Text -toLower = foldrChunks (\chnk acc -> Chunk (T.toLower chnk) acc) Empty +toLower = foldrChunks (\chnk acc -> Chunk (toLowerNonEmpty chnk) acc) Empty {-# INLINE toLower #-} -- | /O(n)/ Convert a string to upper case, using simple case @@ -749,7 +751,7 @@ toLower = foldrChunks (\chnk acc -> Chunk (T.toLower chnk) acc) Empty -- instance, the German eszett (U+00DF) maps to the two-letter -- sequence SS. toUpper :: Text -> Text -toUpper = foldrChunks (\chnk acc -> Chunk (T.toUpper chnk) acc) Empty +toUpper = foldrChunks (\chnk acc -> Chunk (toUpperNonEmpty chnk) acc) Empty {-# INLINE toUpper #-} @@ -1682,7 +1684,7 @@ stripSuffix p t = reverse `fmap` stripPrefix (reverse p) (reverse t) -- returns a 'Text' containing those characters that satisfy the -- predicate. filter :: (Char -> Bool) -> Text -> Text -filter p = foldrChunks (chunk . T.filter p) Empty +filter p = foldrChunks (chunk . filter_ T.Text p) Empty {-# INLINE [1] filter #-} {-# RULES diff --git a/src/Data/Text/Show.hs b/src/Data/Text/Show.hs index 92aef473..f0996914 100644 --- a/src/Data/Text/Show.hs +++ b/src/Data/Text/Show.hs @@ -24,7 +24,7 @@ module Data.Text.Show ) where import Control.Monad.ST (ST, runST) -import Data.Text.Internal (Text(..), empty_, safe, pack) +import Data.Text.Internal (Text(..), empty, safe, pack) import Data.Text.Internal.Encoding.Utf8 (utf8Length) import Data.Text.Internal.Fusion (stream) import Data.Text.Internal.Unsafe.Char (unsafeWrite) @@ -123,7 +123,7 @@ foreign import capi unsafe "string.h strlen" c_strlen :: CString -> CSize pack (GHC.unpackCStringUtf8# a) = unpackCString# a #-} {-# RULES "TEXT empty literal" - pack [] = empty_ #-} + pack [] = empty #-} {-# RULES "TEXT singleton literal" forall a. pack [a] = singleton a #-} diff --git a/tests/Tests.hs b/tests/Tests.hs index 17e132e6..8ff6aca3 100644 --- a/tests/Tests.hs +++ b/tests/Tests.hs @@ -9,6 +9,7 @@ import Test.Tasty (defaultMain, testGroup) import qualified Tests.Lift as Lift import qualified Tests.Properties as Properties import qualified Tests.Regressions as Regressions +import qualified Tests.ShareEmpty as ShareEmpty import qualified Tests.RebindableSyntaxTest as RST main :: IO () @@ -16,5 +17,6 @@ main = defaultMain $ testGroup "All" [ Lift.tests , Properties.tests , Regressions.tests + , ShareEmpty.tests , RST.tests ] diff --git a/tests/Tests/ShareEmpty.hs b/tests/Tests/ShareEmpty.hs new file mode 100644 index 00000000..efeea75f --- /dev/null +++ b/tests/Tests/ShareEmpty.hs @@ -0,0 +1,126 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE BangPatterns #-} + +module Tests.ShareEmpty + ( tests + ) where + +import Control.Exception (evaluate) +import Data.Text +import Language.Haskell.TH.Syntax (lift) +import Test.Tasty.HUnit (testCase, assertFailure, assertEqual) +import Test.Tasty (TestTree, testGroup) +import GHC.Exts +import GHC.Stack +import qualified Data.List as L +import qualified Data.Text as T + + +-- | assert that a text value is represented by the same pointer +-- as the 'empty' value. +assertPtrEqEmpty :: HasCallStack => Text -> IO () +assertPtrEqEmpty t = do + t' <- evaluate t + empty' <- evaluate empty + assertEqual "" empty' t' + case reallyUnsafePtrEquality# empty' t' of + 1# -> pure () + _ -> assertFailure "Pointers are not equal" +{-# NOINLINE assertPtrEqEmpty #-} + +tests :: TestTree +tests = testGroup "empty Text values are shared" + [ testCase "empty = empty" $ assertPtrEqEmpty T.empty + , testCase "pack \"\" = empty" $ assertPtrEqEmpty $ T.pack "" + , testCase "fromString \"\" = empty" $ assertPtrEqEmpty $ fromString "" + , testCase "$(lift \"\") = empty" $ assertPtrEqEmpty $ $(lift (pack "")) + , testCase "tail of a singleton = empty" $ assertPtrEqEmpty $ T.tail "a" + , testCase "init of a singleton = empty" $ assertPtrEqEmpty $ T.init "b" + , testCase "map _ empty = empty" $ assertPtrEqEmpty $ T.map id empty + , testCase "intercalate _ [] = empty" $ assertPtrEqEmpty $ T.intercalate ", " [] + , testCase "intersperse _ empty = empty" $ assertPtrEqEmpty $ T.intersperse ',' "" + , testCase "reverse empty = empty" $ assertPtrEqEmpty $ + T.reverse empty + , testCase "replace _ _ empty = empty" $ assertPtrEqEmpty $ + T.replace "needle" "replacement" empty + , testCase "toCaseFold empty = empty" $ assertPtrEqEmpty $ T.toCaseFold "" + , testCase "toLower empty = empty" $ assertPtrEqEmpty $ T.toLower "" + , testCase "toUpper empty = empty" $ assertPtrEqEmpty $ T.toUpper "" + , testCase "toTitle empty = empty" $ assertPtrEqEmpty $ T.toTitle "" + , testCase "justifyLeft 0 _ empty = empty" $ assertPtrEqEmpty $ + justifyLeft 0 ' ' empty + , testCase "justifyRight 0 _ empty = empty" $ assertPtrEqEmpty $ + justifyRight 0 ' ' empty + , testCase "center 0 _ empty = empty" $ assertPtrEqEmpty $ + T.center 0 ' ' empty + , testCase "transpose [empty] = [empty]" $ mapM_ assertPtrEqEmpty $ + T.transpose [empty] + , testCase "concat [] = empty" $ assertPtrEqEmpty $ T.concat [] + , testCase "concat [empty] = empty" $ assertPtrEqEmpty $ T.concat [empty] + , testCase "replicate 0 _ = empty" $ assertPtrEqEmpty $ T.replicate 0 "x" + , testCase "replicate _ empty = empty" $ assertPtrEqEmpty $ T.replicate 10 empty + , testCase "unfoldr (const Nothing) _ = empty" $ assertPtrEqEmpty $ + T.unfoldr (const Nothing) () + , testCase "take 0 _ = empty" $ assertPtrEqEmpty $ + T.take 0 "xyz" + , testCase "takeEnd 0 _ = empty" $ assertPtrEqEmpty $ + T.takeEnd 0 "xyz" + , testCase "takeWhile (const False) _ = empty" $ assertPtrEqEmpty $ + T.takeWhile (const False) "xyz" + , testCase "takeWhileEnd (const False) _ = empty" $ assertPtrEqEmpty $ + T.takeWhileEnd (const False) "xyz" + , testCase "drop n x = empty where n > len x" $ assertPtrEqEmpty $ + T.drop 5 "xyz" + , testCase "dropEnd n x = empty where n > len x" $ assertPtrEqEmpty $ + T.dropEnd 5 "xyz" + , testCase "dropWhile (const True) x = empty" $ assertPtrEqEmpty $ + T.dropWhile (const True) "xyz" + , testCase "dropWhileEnd (const True) x = empty" $ assertPtrEqEmpty $ + dropWhileEnd (const True) "xyz" + , testCase "dropAround _ empty = empty" $ assertPtrEqEmpty $ + dropAround (const True) empty + , testCase "stripStart empty = empty" $ assertPtrEqEmpty $ T.stripStart empty + , testCase "stripEnd empty = empty" $ assertPtrEqEmpty $ T.stripEnd empty + , testCase "strip empty = empty" $ assertPtrEqEmpty $ T.strip empty + , testCase "fst (splitAt 0 _) = empty" $ assertPtrEqEmpty $ fst $ T.splitAt 0 "123" + , testCase "snd (splitAt n x) = empty where n > len x" $ assertPtrEqEmpty $ + snd $ T.splitAt 5 "123" + , testCase "fst (span (const False) _) = empty" $ assertPtrEqEmpty $ + fst $ T.span (const False) "123" + , testCase "snd (span (const True) _) = empty" $ assertPtrEqEmpty $ + snd $ T.span (const True) "123" + , testCase "fst (break (const False) _) = empty" $ assertPtrEqEmpty $ + fst $ T.span (const False) "123" + , testCase "snd (break (const True) _) = empty" $ assertPtrEqEmpty $ + snd $ T.span (const True) "123" + , testCase "fst (spanM (const $ pure False) _) = empty" $ + assertPtrEqEmpty . fst =<< T.spanM (const $ pure False) "123" + , testCase "snd (spanM (const $ pure True) _) = empty" $ + assertPtrEqEmpty . snd =<< T.spanM (const $ pure True) "123" + , testCase "fst (spanEndM (const $ pure True) _) = empty" $ + assertPtrEqEmpty . fst =<< T.spanEndM (const $ pure True) "123" + , testCase "snd (spanEndM (const $ pure False) _) = empty" $ + assertPtrEqEmpty . snd =<< T.spanEndM (const $ pure False) "123" + , testCase "groupBy _ empty = [empty]" $ mapM_ assertPtrEqEmpty $ T.groupBy (==) empty + , testCase "inits empty = [empty]" $ mapM_ assertPtrEqEmpty $ T.inits empty + , testCase "inits _ = [empty, ...]" $ assertPtrEqEmpty $ L.head $ T.inits "123" + , testCase "tails empty = [empty]" $ mapM_ assertPtrEqEmpty $ T.tails empty + , testCase "tails _ = [..., empty]" $ assertPtrEqEmpty $ L.last $ T.tails "123" + , testCase "tails _ = [..., empty]" $ assertPtrEqEmpty $ L.last $ T.tails "123" + , testCase "split _ empty = [empty]" $ mapM_ assertPtrEqEmpty $ T.split (== 'a') "" + , testCase "filter (const False) _ = empty" $ assertPtrEqEmpty $ T.filter (const False) "1234" + , testCase "zipWith const empty empty = empty" $ assertPtrEqEmpty $ T.zipWith const "" "" + , testCase "unlines [] = empty" $ assertPtrEqEmpty $ T.unlines [] + , testCase "unwords [] = empty" $ assertPtrEqEmpty $ T.unwords [] + , testCase "stripPrefix empty empty = Just empty" $ mapM_ assertPtrEqEmpty $ + T.stripPrefix empty empty + , testCase "stripSuffix empty empty = Just empty" $ mapM_ assertPtrEqEmpty $ + T.stripSuffix empty empty + , testCase "commonPrefixes \"xyz\" \"123\" = Just (_, empty, _)" $ + mapM_ (assertPtrEqEmpty . (\(_, x, _) -> x)) $ T.commonPrefixes "xyz" "123" + , testCase "commonPrefixes \"xyz\" \"xyz\" = Just (_, _, empty)" $ + mapM_ (assertPtrEqEmpty . (\(_, _, x) -> x)) $ T.commonPrefixes "xyz" "xyz" + , testCase "copy empty = empty" $ assertPtrEqEmpty $ T.copy "" + ] diff --git a/text.cabal b/text.cabal index bad26bb4..b73c498d 100644 --- a/text.cabal +++ b/text.cabal @@ -207,6 +207,7 @@ library other-modules: Data.Text.Show Data.Text.Internal.Reverse + Data.Text.Internal.Transformation build-depends: array >= 0.3 && < 0.6, @@ -277,6 +278,7 @@ test-suite tests Tests.RebindableSyntaxTest Tests.Regressions Tests.SlowFunctions + Tests.ShareEmpty Tests.Utils build-depends: